···
-
io::{self, Cursor, Write},
ops::{Bound, Deref, RangeBounds},
-
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering as AtomicOrdering},
···
use fjall::{Config, Keyspace, Partition, PartitionCreateOptions, Slice};
use itertools::{Either, Itertools};
use ordered_varint::Variable;
use rkyv::{Archive, Deserialize, Serialize, rancor::Error};
use tokio::sync::broadcast;
-
use tokio_util::bytes::{self, BufMut};
db::block::{ReadVariableExt, WriteVariableExt},
···
pub struct LexiconHandle {
buf: Arc<scc::Queue<EventRecord>>,
// this is stored here since scc::Queue does not have O(1) length
-
buf_len: AtomicUsize, // relaxed
last_insert: AtomicU64, // relaxed
···
let opts = PartitionCreateOptions::default().compression(fjall::CompressionType::Miniz(9));
tree: keyspace.open_partition(nsid, opts).unwrap(),
buf_len: AtomicUsize::new(0),
last_insert: AtomicU64::new(0),
···
fn item_count(&self) -> usize {
-
self.buf_len.load(AtomicOrdering::Relaxed)
fn since_last_activity(&self) -> u64 {
···
fn insert(&self, event: EventRecord) {
-
self.buf_len.fetch_add(1, AtomicOrdering::Relaxed);
self.last_insert.store(CLOCK.raw(), AtomicOrdering::Relaxed);
-
fn sync(&self, max_block_size: usize) -> AppResult<usize> {
size_of::<u64>() + self.item_count().min(max_block_size) * size_of::<(u64, NsidHit)>();
let mut writer = ItemEncoder::new(WritableByteView::with_size(buf_size));
···
if let (Some(start_timestamp), Some(end_timestamp)) = (start_timestamp, end_timestamp) {
-
self.buf_len.store(0, AtomicOrdering::Relaxed);
let value = writer.finish()?;
-
let mut key = Vec::with_capacity(size_of::<u64>() * 2);
key.write_varint(start_timestamp)?;
key.write_varint(end_timestamp)?;
-
self.tree.insert(key, value.into_inner())?;
···
hits: scc::HashIndex<SmolStr, Arc<LexiconHandle>>,
-
syncpool: threadpool::ThreadPool,
event_broadcaster: broadcast::Sender<(SmolStr, NsidCounts)>,
-
shutting_down: AtomicBool,
-
pub fn new(path: impl AsRef<Path>) -> AppResult<Self> {
tracing::info!("opening db...");
let ks = Config::new(path)
.cache_size(8 * 1024 * 1024) // from talna
hits: Default::default(),
-
syncpool: threadpool::Builder::new().num_threads(256).build(),
counts: ks.open_partition(
PartitionCreateOptions::default().compression(fjall::CompressionType::None),
···
event_broadcaster: broadcast::channel(1000).0,
eps: RateTracker::new(Duration::from_secs(1)),
-
shutting_down: AtomicBool::new(false),
max_last_activity: Duration::from_secs(10).as_nanos() as u64,
-
pub fn shutdown(&self) -> AppResult<()> {
-
self.shutting_down.store(true, AtomicOrdering::Release);
pub fn is_shutting_down(&self) -> bool {
-
self.shutting_down.load(AtomicOrdering::Acquire)
pub fn sync(&self, all: bool) -> AppResult<()> {
-
let mut execs = Vec::with_capacity(self.hits.len());
let _guard = scc::ebr::Guard::new();
-
for (nsid, tree) in self.hits.iter(&_guard) {
-
let count = tree.item_count();
-
let is_max_block_size = count > self.min_block_size.max(tree.suggested_block_size());
-
let is_too_old = tree.since_last_activity() > self.max_last_activity;
-
if count > 0 && (all || is_max_block_size || is_too_old) {
-
let nsid = nsid.clone();
-
let tree = tree.clone();
-
let max_block_size = self.max_block_size;
-
let synced = match tree.sync(max_block_size) {
-
tracing::error!("failed to sync {nsid}: {err}");
-
tracing::info!("synced {synced} of {nsid} to db");
-
self.syncpool.execute(exec);
···
ops::{Bound, Deref, RangeBounds},
+
atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering},
···
use fjall::{Config, Keyspace, Partition, PartitionCreateOptions, Slice};
use itertools::{Either, Itertools};
use ordered_varint::Variable;
+
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use rkyv::{Archive, Deserialize, Serialize, rancor::Error};
use tokio::sync::broadcast;
+
use tokio_util::sync::CancellationToken;
db::block::{ReadVariableExt, WriteVariableExt},
···
pub struct LexiconHandle {
buf: Arc<scc::Queue<EventRecord>>,
// this is stored here since scc::Queue does not have O(1) length
+
buf_len: AtomicUsize, // seqcst
last_insert: AtomicU64, // relaxed
···
let opts = PartitionCreateOptions::default().compression(fjall::CompressionType::Miniz(9));
tree: keyspace.open_partition(nsid, opts).unwrap(),
buf_len: AtomicUsize::new(0),
last_insert: AtomicU64::new(0),
···
fn item_count(&self) -> usize {
+
self.buf_len.load(AtomicOrdering::SeqCst)
fn since_last_activity(&self) -> u64 {
···
fn insert(&self, event: EventRecord) {
+
self.buf_len.fetch_add(1, AtomicOrdering::SeqCst);
self.last_insert.store(CLOCK.raw(), AtomicOrdering::Relaxed);
+
fn encode_block(&self, max_block_size: usize) -> AppResult<Option<Block>> {
size_of::<u64>() + self.item_count().min(max_block_size) * size_of::<(u64, NsidHit)>();
let mut writer = ItemEncoder::new(WritableByteView::with_size(buf_size));
···
if let (Some(start_timestamp), Some(end_timestamp)) = (start_timestamp, end_timestamp) {
+
self.buf_len.store(0, AtomicOrdering::SeqCst);
let value = writer.finish()?;
+
let mut key = WritableByteView::with_size(size_of::<u64>() * 2);
key.write_varint(start_timestamp)?;
key.write_varint(end_timestamp)?;
+
data: value.into_inner(),
···
hits: scc::HashIndex<SmolStr, Arc<LexiconHandle>>,
+
sync_pool: threadpool::ThreadPool,
event_broadcaster: broadcast::Sender<(SmolStr, NsidCounts)>,
+
cancel_token: CancellationToken,
+
pub fn new(path: impl AsRef<Path>, cancel_token: CancellationToken) -> AppResult<Self> {
tracing::info!("opening db...");
let ks = Config::new(path)
.cache_size(8 * 1024 * 1024) // from talna
hits: Default::default(),
+
sync_pool: threadpool::Builder::new()
+
.num_threads(rayon::current_num_threads() * 2)
counts: ks.open_partition(
PartitionCreateOptions::default().compression(fjall::CompressionType::None),
···
event_broadcaster: broadcast::channel(1000).0,
eps: RateTracker::new(Duration::from_secs(1)),
max_last_activity: Duration::from_secs(10).as_nanos() as u64,
+
pub fn shutting_down(&self) -> impl Future<Output = ()> {
+
self.cancel_token.cancelled()
pub fn is_shutting_down(&self) -> bool {
+
self.cancel_token.is_cancelled()
pub fn sync(&self, all: bool) -> AppResult<()> {
+
// prepare all the data
+
let mut data = Vec::with_capacity(self.hits.len());
let _guard = scc::ebr::Guard::new();
+
for (_, handle) in self.hits.iter(&_guard) {
+
.min(self.min_block_size.max(handle.suggested_block_size()));
+
let count = handle.item_count();
+
let data_count = count / block_size;
+
let is_too_old = handle.since_last_activity() > self.max_last_activity;
+
if count > 0 && (all || data_count > 0 || is_too_old) {
+
for i in 0..data_count {
+
data.push((i, handle.clone(), block_size));
+
// only sync remainder if we haven't met block size
+
let remainder = count % block_size;
+
if data_count == 0 && remainder > 0 {
+
data.push((data_count, handle.clone(), remainder));
+
let mut blocks = Vec::with_capacity(data.len());
+
.map(|(i, handle, max_block_size)| {
+
.encode_block(max_block_size)
+
.map(|r| r.map(|block| (i, block, handle.clone())))
+
.collect_into_vec(&mut blocks);
+
for item in blocks.into_iter() {
+
let Some((i, block, handle)) = item.transpose()? else {
+
.execute(move || match handle.tree.insert(block.key, block.data) {
+
tracing::info!("[{i}] synced {} of {} to db", block.written, handle.nsid)
+
Err(err) => tracing::error!("failed to sync block: {}", err),