Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

big gc bigtime

previously: forgot to delete all records

this probably takes a lot of memory

hopefully won't be needed again

Changed files
+107 -19
ufos
+22 -2
ufos/src/main.rs
···
use ufos::file_consumer;
use ufos::server;
use ufos::storage::{StorageWhatever, StoreBackground, StoreReader, StoreWriter};
-
use ufos::storage_fjall::FjallStorage;
use ufos::store_types::SketchSecretPrefix;
use ufos::{nice_duration, ConsumerInfo};
···
/// DEBUG: interpret jetstream as a file fixture
#[arg(long, action)]
jetstream_fixture: bool,
}
#[tokio::main]
···
args.data.clone(),
jetstream,
args.jetstream_force,
-
Default::default(),
)?;
go(args, read_store, write_store, cursor, sketch_secret).await?;
Ok(())
}
···
use ufos::file_consumer;
use ufos::server;
use ufos::storage::{StorageWhatever, StoreBackground, StoreReader, StoreWriter};
+
use ufos::storage_fjall::{FjallConfig, FjallStorage};
use ufos::store_types::SketchSecretPrefix;
use ufos::{nice_duration, ConsumerInfo};
···
/// DEBUG: interpret jetstream as a file fixture
#[arg(long, action)]
jetstream_fixture: bool,
+
/// HOPEFULLY only needed once
+
///
+
/// brute-force garbage-collect all dangling records because we weren't deleting
+
/// them before at all (oops)
+
#[arg(long, action)]
+
fjall_records_gc: bool,
}
#[tokio::main]
···
args.data.clone(),
jetstream,
args.jetstream_force,
+
FjallConfig {
+
major_compact: !args.fjall_records_gc,
+
},
)?;
+
+
if args.fjall_records_gc {
+
log::info!("beginning brute-force records gc");
+
let t0 = std::time::Instant::now();
+
let (n, m) = write_store.records_brute_gc_danger()?;
+
let dt = t0.elapsed();
+
log::info!(
+
"completed brute-force records gc in {dt:?}, removed {n} and retained {m} records."
+
);
+
return Ok(());
+
}
+
go(args, read_store, write_store, cursor, sketch_secret).await?;
Ok(())
}
+85 -17
ufos/src/storage_fjall.rs
···
/// this is only meant for tests
#[cfg(test)]
pub temp: bool,
}
impl StorageWhatever<FjallReader, FjallWriter, FjallBackground, FjallConfig> for FjallStorage {
···
path: impl AsRef<Path>,
endpoint: String,
force_endpoint: bool,
-
_config: FjallConfig,
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>, SketchSecretPrefix)> {
let keyspace = {
let config = Config::new(path);
···
sketch_secret
};
-
for (partition, name) in [
-
(&global, "global"),
-
(&feeds, "feeds"),
-
(&records, "records"),
-
(&rollups, "rollups"),
-
(&queues, "queues"),
-
] {
-
let size0 = partition.disk_space();
-
log::info!("beggining major compaction for {name} (original size: {size0})");
-
let t0 = Instant::now();
-
partition.major_compact().expect("compact better work 😬");
-
let dt = t0.elapsed();
-
let sizef = partition.disk_space();
-
let dsize = (sizef as i64) - (size0 as i64);
-
log::info!("completed compaction for {name} in {dt:?} (new size: {sizef}, {dsize})");
}
let reader = FjallReader {
···
batch.commit()?;
Ok((cursors_advanced, dirty_nsids))
}
}
impl StoreWriter<FjallBackground> for FjallWriter {
···
tempfile::tempdir().unwrap(),
"offline test (no real jetstream endpoint)".to_string(),
false,
-
FjallConfig { temp: true },
)
.unwrap();
(read, write)
···
/// this is only meant for tests
#[cfg(test)]
pub temp: bool,
+
/// do major compaction on startup
+
///
+
/// default is false. probably a good thing unless it's too slow.
+
pub major_compact: bool,
}
impl StorageWhatever<FjallReader, FjallWriter, FjallBackground, FjallConfig> for FjallStorage {
···
path: impl AsRef<Path>,
endpoint: String,
force_endpoint: bool,
+
config: FjallConfig,
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>, SketchSecretPrefix)> {
let keyspace = {
let config = Config::new(path);
···
sketch_secret
};
+
if config.major_compact {
+
for (partition, name) in [
+
(&global, "global"),
+
(&feeds, "feeds"),
+
(&records, "records"),
+
(&rollups, "rollups"),
+
(&queues, "queues"),
+
] {
+
let size0 = partition.disk_space();
+
log::info!("beggining major compaction for {name} (original size: {size0})");
+
let t0 = Instant::now();
+
partition.major_compact().expect("compact better work 😬");
+
let dt = t0.elapsed();
+
let sizef = partition.disk_space();
+
let dsize = (sizef as i64) - (size0 as i64);
+
log::info!(
+
"completed compaction for {name} in {dt:?} (new size: {sizef}, {dsize})"
+
);
+
}
+
} else {
+
log::info!("skipping major compaction on startup");
}
let reader = FjallReader {
···
batch.commit()?;
Ok((cursors_advanced, dirty_nsids))
}
+
pub fn records_brute_gc_danger(&self) -> StorageResult<(usize, usize)> {
+
let (mut removed, mut retained) = (0, 0);
+
let mut to_retain = HashSet::<Vec<u8>>::new();
+
+
// Partition: 'feed'
+
//
+
// - Per-collection list of record references ordered by jetstream cursor
+
// - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor)
+
// - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.)
+
//
+
//
+
// Partition: 'records'
+
//
+
// - Actual records by their atproto location
+
// - key: nullstr || nullstr || nullstr (did, collection, rkey)
+
// - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record)
+
//
+
//
+
+
log::warn!("loading *all* record keys from feed into memory (yikes)");
+
let t0 = Instant::now();
+
for (i, kv) in self.feeds.iter().enumerate() {
+
if i > 0 && (i % 100000 == 0) {
+
log::info!("{i}...");
+
}
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
+
let val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
+
let record_key: RecordLocationKey = (&key, &val).into();
+
to_retain.insert(record_key.to_db_bytes()?);
+
}
+
log::warn!(
+
"loaded. wow. took {:?}, found {} keys",
+
t0.elapsed(),
+
to_retain.len()
+
);
+
+
log::warn!("warmup OVER, iterating some billions of record keys now");
+
let t0 = Instant::now();
+
for (i, k) in self.records.keys().enumerate() {
+
let key_bytes = k?;
+
if to_retain.contains(&*key_bytes) {
+
retained += 1;
+
} else {
+
self.records.remove(key_bytes)?;
+
removed += 1;
+
}
+
if i > 0 && (i % 10_000_000) == 0 {
+
log::info!("{i}: {retained} retained, {removed} removed.");
+
}
+
}
+
log::warn!("whew! that took {:?}", t0.elapsed());
+
+
Ok((removed, retained))
+
}
}
impl StoreWriter<FjallBackground> for FjallWriter {
···
tempfile::tempdir().unwrap(),
"offline test (no real jetstream endpoint)".to_string(),
false,
+
FjallConfig {
+
temp: true,
+
..Default::default()
+
},
)
.unwrap();
(read, write)