tracks lexicons and how many times they appeared on the jetstream

feat(server): use snapshots for reading from handles to not block writes, updating every sync

ptr.pet 9da170a1 a050efab

verified
Changed files
+147 -37
server
+7
server/Cargo.lock
···
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "async-compression"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"axum",
"axum-tws",
···
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
+
name = "arc-swap"
+
version = "1.7.1"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
+
+
[[package]]
name = "async-compression"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
version = "0.1.0"
dependencies = [
"anyhow",
+
"arc-swap",
"async-trait",
"axum",
"axum-tws",
+1
server/Cargo.toml
···
parking_lot = { version = "0.12", features = ["send_guard", "hardware-lock-elision"] }
rclite = "0.2.7"
snmalloc-rs = "0.3.8"
···
parking_lot = { version = "0.12", features = ["send_guard", "hardware-lock-elision"] }
rclite = "0.2.7"
snmalloc-rs = "0.3.8"
+
arc-swap = "1.7.1"
+36 -17
server/src/db/handle.rs
···
use std::{
fmt::Debug,
io::Cursor,
-
ops::{Bound, Deref, RangeBounds},
sync::atomic::{AtomicU64, Ordering as AtomicOrdering},
time::Duration,
};
use byteview::ByteView;
-
use fjall::{Keyspace, Partition, PartitionCreateOptions, Slice};
use itertools::Itertools;
use parking_lot::Mutex;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
···
use crate::{
db::{EventRecord, NsidHit, block},
-
error::AppResult,
-
utils::{CLOCK, DefaultRateTracker, RateTracker, ReadVariableExt, varints_unsigned_encoded},
};
pub type ItemDecoder = block::ItemDecoder<Cursor<Slice>, NsidHit>;
···
}
pub struct LexiconHandle {
-
tree: Partition,
nsid: SmolStr,
buf: Arc<Mutex<Vec<EventRecord>>>,
last_insert: AtomicU64, // relaxed
···
}
}
-
impl Deref for LexiconHandle {
-
type Target = Partition;
-
-
fn deref(&self) -> &Self::Target {
-
&self.tree
-
}
-
}
-
impl LexiconHandle {
pub fn new(keyspace: &Keyspace, nsid: &str) -> Self {
let opts = PartitionCreateOptions::default()
.block_size(1024 * 48)
.compression(fjall::CompressionType::Miniz(9));
Self {
-
tree: keyspace.open_partition(nsid, opts).unwrap(),
nsid: nsid.into(),
buf: Default::default(),
last_insert: AtomicU64::new(0),
···
}
}
pub fn span(&self) -> tracing::Span {
tracing::info_span!("handle", nsid = %self.nsid)
}
pub fn nsid(&self) -> &SmolStr {
&self.nsid
}
pub fn item_count(&self) -> usize {
self.buf.lock().len()
}
···
let end_key = varints_unsigned_encoded([end_limit]);
let blocks_to_compact = self
-
.tree
.range(start_key..end_key)
.collect::<Result<Vec<_>, _>>()?;
if blocks_to_compact.len() < 2 {
···
let end_blocks_size = new_blocks.len();
for key in keys_to_delete {
-
self.tree.remove(key.clone())?;
}
for block in new_blocks {
-
self.tree.insert(block.key, block.data)?;
}
let reduction =
···
);
Ok(())
}
pub fn encode_block_from_items(
···
use std::{
fmt::Debug,
io::Cursor,
+
ops::{Bound, RangeBounds},
sync::atomic::{AtomicU64, Ordering as AtomicOrdering},
time::Duration,
};
use byteview::ByteView;
+
use fjall::{Keyspace, Partition, PartitionCreateOptions, Slice, Snapshot};
use itertools::Itertools;
use parking_lot::Mutex;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
···
use crate::{
db::{EventRecord, NsidHit, block},
+
error::{AppError, AppResult},
+
utils::{
+
ArcRefCnt, ArcliteSwap, CLOCK, DefaultRateTracker, RateTracker, ReadVariableExt,
+
varints_unsigned_encoded,
+
},
};
pub type ItemDecoder = block::ItemDecoder<Cursor<Slice>, NsidHit>;
···
}
pub struct LexiconHandle {
+
write_tree: Partition,
+
read_tree: ArcliteSwap<Snapshot>,
nsid: SmolStr,
buf: Arc<Mutex<Vec<EventRecord>>>,
last_insert: AtomicU64, // relaxed
···
}
}
impl LexiconHandle {
pub fn new(keyspace: &Keyspace, nsid: &str) -> Self {
let opts = PartitionCreateOptions::default()
.block_size(1024 * 48)
.compression(fjall::CompressionType::Miniz(9));
+
let write_tree = keyspace.open_partition(nsid, opts).unwrap();
+
let read_tree = ArcliteSwap::new(ArcRefCnt::new(write_tree.snapshot()));
Self {
+
write_tree,
+
read_tree,
nsid: nsid.into(),
buf: Default::default(),
last_insert: AtomicU64::new(0),
···
}
}
+
#[inline(always)]
+
pub fn read(&self) -> arc_swap::Guard<ArcRefCnt<Snapshot>> {
+
self.read_tree.load()
+
}
+
+
#[inline(always)]
+
pub fn update_tree(&self) {
+
self.read_tree
+
.store(ArcRefCnt::new(self.write_tree.snapshot()));
+
}
+
+
#[inline(always)]
pub fn span(&self) -> tracing::Span {
tracing::info_span!("handle", nsid = %self.nsid)
}
+
#[inline(always)]
pub fn nsid(&self) -> &SmolStr {
&self.nsid
}
+
#[inline(always)]
pub fn item_count(&self) -> usize {
self.buf.lock().len()
}
···
let end_key = varints_unsigned_encoded([end_limit]);
let blocks_to_compact = self
+
.read()
.range(start_key..end_key)
.collect::<Result<Vec<_>, _>>()?;
if blocks_to_compact.len() < 2 {
···
let end_blocks_size = new_blocks.len();
for key in keys_to_delete {
+
self.write_tree.remove(key.clone())?;
}
for block in new_blocks {
+
self.write_tree.insert(block.key, block.data)?;
}
let reduction =
···
);
Ok(())
+
}
+
+
pub fn insert_block(&self, block: Block) -> AppResult<()> {
+
self.write_tree
+
.insert(block.key, block.data)
+
.map_err(AppError::from)
}
pub fn encode_block_from_items(
+37 -20
server/src/db/mod.rs
···
use std::{
-
collections::HashMap,
fmt::Debug,
io::Cursor,
ops::{Bound, Deref, RangeBounds},
···
pub fn sync(&self, all: bool) -> AppResult<()> {
let start = CLOCK.now();
// prepare all the data
-
let mut data = Vec::with_capacity(self.hits.len());
let _guard = scc::ebr::Guard::new();
-
for (_, handle) in self.hits.iter(&_guard) {
let mut nsid_data = Vec::with_capacity(2);
let mut total_count = 0;
let is_too_old = handle.since_last_activity() > self.cfg.max_last_activity;
···
{blocks = %nsid_data.len(), count = %total_count},
"will encode & sync",
);
data.push(nsid_data);
}
}
···
for (block, handle) in chunk {
self.sync_pool.execute(move || {
let _span = handle.span().entered();
-
match handle.insert(block.key, block.data) {
Ok(_) => {
-
tracing::info!({count = %block.written}, "synced")
}
Err(err) => tracing::error!({ err = %err }, "failed to sync block"),
}
···
AppResult::Ok(())
})?;
self.sync_pool.join();
tracing::info!(time = %start.elapsed().as_secs_f64(), "synced all blocks");
Ok(())
···
let Some(handle) = self.get_handle(nsid) else {
return Ok(());
};
-
handle.compact(max_count, range, sort)
}
pub fn compact_all(
···
let Some(handle) = self.get_handle(&nsid) else {
continue;
};
-
let block_lens = handle.iter().rev().try_fold(Vec::new(), |mut acc, item| {
-
let (key, value) = item?;
-
let mut timestamps = Cursor::new(key);
-
let start_timestamp = timestamps.read_varint()?;
-
let decoder = ItemDecoder::new(Cursor::new(value), start_timestamp)?;
-
acc.push(decoder.item_count());
-
AppResult::Ok(acc)
-
})?;
nsids.insert(nsid.to_smolstr(), block_lens);
}
Ok(DbInfo {
···
))
};
-
let (blocks, counted) = handle
.range(..end_key)
.map(|res| res.map_err(AppError::from))
.rev()
···
)
.into_inner();
-
tracing::info!(
-
"got blocks with size {}, item count {counted}",
-
blocks.len()
-
);
Either::Left(blocks.into_iter().rev().flatten().flatten())
}
···
let Some(handle) = self.get_handle("app.bsky.feed.like") else {
return Ok(0);
};
-
let Some((timestamps_raw, _)) = handle.first_key_value()? else {
return Ok(0);
};
let mut timestamp_reader = Cursor::new(timestamps_raw);
···
use std::{
+
collections::{HashMap, HashSet},
fmt::Debug,
io::Cursor,
ops::{Bound, Deref, RangeBounds},
···
pub fn sync(&self, all: bool) -> AppResult<()> {
let start = CLOCK.now();
// prepare all the data
+
let nsids_len = self.hits.len();
+
let mut data = Vec::with_capacity(nsids_len);
+
let mut nsids = HashSet::with_capacity(nsids_len);
let _guard = scc::ebr::Guard::new();
+
for (nsid, handle) in self.hits.iter(&_guard) {
let mut nsid_data = Vec::with_capacity(2);
let mut total_count = 0;
let is_too_old = handle.since_last_activity() > self.cfg.max_last_activity;
···
{blocks = %nsid_data.len(), count = %total_count},
"will encode & sync",
);
+
nsids.insert(nsid.clone());
data.push(nsid_data);
}
}
···
for (block, handle) in chunk {
self.sync_pool.execute(move || {
let _span = handle.span().entered();
+
let written = block.written;
+
match handle.insert_block(block) {
Ok(_) => {
+
tracing::info!({count = %written}, "synced")
}
Err(err) => tracing::error!({ err = %err }, "failed to sync block"),
}
···
AppResult::Ok(())
})?;
self.sync_pool.join();
+
+
// update snapshots for all (changed) handles
+
for nsid in nsids {
+
self.hits.peek_with(&nsid, |_, handle| handle.update_tree());
+
}
+
tracing::info!(time = %start.elapsed().as_secs_f64(), "synced all blocks");
Ok(())
···
let Some(handle) = self.get_handle(nsid) else {
return Ok(());
};
+
handle.compact(max_count, range, sort)?;
+
handle.update_tree();
+
Ok(())
}
pub fn compact_all(
···
let Some(handle) = self.get_handle(&nsid) else {
continue;
};
+
let block_lens = handle
+
.read()
+
.iter()
+
.rev()
+
.try_fold(Vec::new(), |mut acc, item| {
+
let (key, value) = item?;
+
let mut timestamps = Cursor::new(key);
+
let start_timestamp = timestamps.read_varint()?;
+
let decoder = ItemDecoder::new(Cursor::new(value), start_timestamp)?;
+
acc.push(decoder.item_count());
+
AppResult::Ok(acc)
+
})?;
nsids.insert(nsid.to_smolstr(), block_lens);
}
Ok(DbInfo {
···
))
};
+
let (blocks, _counted) = handle
+
.read()
.range(..end_key)
.map(|res| res.map_err(AppError::from))
.rev()
···
)
.into_inner();
+
// tracing::info!(
+
// "got blocks with size {}, item count {counted}",
+
// blocks.len()
+
// );
Either::Left(blocks.into_iter().rev().flatten().flatten())
}
···
let Some(handle) = self.get_handle("app.bsky.feed.like") else {
return Ok(0);
};
+
let Some((timestamps_raw, _)) = handle.read().first_key_value()? else {
return Ok(0);
};
let mut timestamp_reader = Cursor::new(timestamps_raw);
+66
server/src/utils.rs
···
use std::io::{self, Read, Write};
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
use byteview::ByteView;
use ordered_varint::Variable;
pub fn get_time() -> Duration {
std::time::SystemTime::now()
···
}
}
}
···
use std::io::{self, Read, Write};
+
use std::ops::Deref;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
+
use arc_swap::RefCnt;
use byteview::ByteView;
use ordered_varint::Variable;
+
use rclite::Arc;
pub fn get_time() -> Duration {
std::time::SystemTime::now()
···
}
}
}
+
+
pub type ArcliteSwap<T> = arc_swap::ArcSwapAny<ArcRefCnt<T>>;
+
+
pub struct ArcRefCnt<T>(Arc<T>);
+
+
impl<T> ArcRefCnt<T> {
+
pub fn new(value: T) -> Self {
+
Self(Arc::new(value))
+
}
+
}
+
+
impl<T> Deref for ArcRefCnt<T> {
+
type Target = T;
+
+
fn deref(&self) -> &Self::Target {
+
&self.0
+
}
+
}
+
+
impl<T> Clone for ArcRefCnt<T> {
+
fn clone(&self) -> Self {
+
Self(self.0.clone())
+
}
+
}
+
+
// SAFETY: uhhhhhhhh copied the Arc impl from arc_swap xd
+
unsafe impl<T> RefCnt for ArcRefCnt<T> {
+
type Base = T;
+
+
fn into_ptr(me: Self) -> *mut Self::Base {
+
Arc::into_raw(me.0) as *mut T
+
}
+
+
fn as_ptr(me: &Self) -> *mut Self::Base {
+
// Slightly convoluted way to do this, but this avoids stacked borrows violations. The same
+
// intention as
+
//
+
// me as &T as *const T as *mut T
+
//
+
// We first create a "shallow copy" of me - one that doesn't really own its ref count
+
// (that's OK, me _does_ own it, so it can't be destroyed in the meantime).
+
// Then we can use into_raw (which preserves not having the ref count).
+
//
+
// We need to "revert" the changes we did. In current std implementation, the combination
+
// of from_raw and forget is no-op. But formally, into_raw shall be paired with from_raw
+
// and that read shall be paired with forget to properly "close the brackets". In future
+
// versions of STD, these may become something else that's not really no-op (unlikely, but
+
// possible), so we future-proof it a bit.
+
+
// SAFETY: &T cast to *const T will always be aligned, initialised and valid for reads
+
let ptr = Arc::into_raw(unsafe { std::ptr::read(&me.0) });
+
let ptr = ptr as *mut T;
+
+
// SAFETY: We got the pointer from into_raw just above
+
std::mem::forget(unsafe { Arc::from_raw(ptr) });
+
+
ptr
+
}
+
+
unsafe fn from_ptr(ptr: *const Self::Base) -> Self {
+
Self(unsafe { Arc::from_raw(ptr) })
+
}
+
}