Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

Merge pull request #12 from at-microcosm/ufos-new-data-model

Ufos new data model

+205 -110
Cargo.lock
···
checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f"
[[package]]
+
name = "arbitrary"
+
version = "1.4.1"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
+
+
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
[[package]]
name = "atrium-api"
-
version = "0.25.0"
+
version = "0.25.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "ea3ea578c768ec91082e424a8d139517b2cb5c75149bf3cec04371a1e74f00f2"
+
checksum = "0d4eb9b4787aba546015c8ccda1d3924c157cee13d67848997fba74ac8144a07"
dependencies = [
"atrium-common",
"atrium-xrpc",
···
[[package]]
name = "atrium-common"
-
version = "0.1.0"
+
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "168e558408847bfed69df1033a32fd051f7a037ebc90ea46e588ccb2bfbd7233"
+
checksum = "ba30d2f9e1a8b3db8fc97d0a5f91ee5a28f8acdddb771ad74c1b08eda357ca3d"
dependencies = [
"dashmap",
"lru",
···
[[package]]
name = "atrium-xrpc"
-
version = "0.12.1"
+
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "6b4956d94147cfbb669c68f654eb4fd6a1d00648c810cec79d04ec5425b8f378"
+
checksum = "18a9e526cb2ed3e0a2ca78c3ce2a943d9041a68e067dadf42923b523771e07df"
dependencies = [
"http",
"serde",
···
[[package]]
name = "axum"
-
version = "0.8.1"
+
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
+
checksum = "de45108900e1f9b9242f7f2e254aa3e2c029c921c258fe9e6b4217eeebd54288"
dependencies = [
"axum-core",
"bytes",
···
[[package]]
name = "axum-core"
-
version = "0.5.0"
+
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
+
checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
dependencies = [
"bytes",
-
"futures-util",
+
"futures-core",
"http",
"http-body",
"http-body-util",
···
[[package]]
name = "axum-extra"
-
version = "0.10.0"
+
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
+
checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d"
dependencies = [
"axum",
"axum-core",
···
"http-body-util",
"mime",
"pin-project-lite",
+
"rustversion",
"serde",
"tower",
"tower-layer",
···
[[package]]
name = "byteview"
-
version = "0.5.4"
+
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "7a4516a8561bff0598c45512f90ee04ed62cee2cb36839e650a0a0704d5f741f"
+
checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5"
[[package]]
name = "bzip2-sys"
···
]
[[package]]
+
name = "cardinality-estimator"
+
version = "1.0.2"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "6ae5e12c435064f9e8ec53c5a782ca9a362702a4863fe1b6448f524ecede8fe3"
+
dependencies = [
+
"enum_dispatch",
+
"serde",
+
"wyhash",
+
]
+
+
[[package]]
name = "cc"
-
version = "1.2.17"
+
version = "1.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a"
+
checksum = "525046617d8376e3db1deffb079e91cef90a89fc3ca5c185bbf8c9ecdd15cd5c"
dependencies = [
"jobserver",
"libc",
···
[[package]]
name = "clap"
-
version = "4.5.33"
+
version = "4.5.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "e2c80cae4c3350dd8f1272c73e83baff9a6ba550b8bfbe651b3c45b78cd1751e"
+
checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944"
dependencies = [
"clap_builder",
"clap_derive",
···
[[package]]
name = "clap_builder"
-
version = "4.5.33"
+
version = "4.5.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "0123e386f691c90aa228219b5b1ee72d465e8e231c79e9c82324f016a62a741c"
+
checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9"
dependencies = [
"anstream",
"anstyle",
···
[[package]]
name = "ctrlc"
-
version = "3.4.5"
+
version = "3.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3"
+
checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c"
dependencies = [
"nix",
"windows-sys 0.59.0",
···
[[package]]
name = "darling"
-
version = "0.20.10"
+
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
+
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core",
"darling_macro",
···
[[package]]
name = "darling_core"
-
version = "0.20.10"
+
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
+
checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
dependencies = [
"fnv",
"ident_case",
···
[[package]]
name = "darling_macro"
-
version = "0.20.10"
+
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
+
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"quote",
···
[[package]]
name = "deranged"
-
version = "0.4.1"
+
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058"
+
checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
dependencies = [
"powerfmt",
"serde",
···
"dropshot_endpoint",
"form_urlencoded",
"futures",
-
"hostname 0.4.0",
+
"hostname 0.4.1",
"http",
"http-body-util",
"hyper",
"hyper-util",
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"multer",
"openapiv3",
"paste",
···
[[package]]
name = "env_logger"
-
version = "0.11.7"
+
version = "0.11.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697"
+
checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
dependencies = [
"anstream",
"anstyle",
···
[[package]]
name = "errno"
-
version = "0.3.10"
+
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
+
checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e"
dependencies = [
"libc",
"windows-sys 0.59.0",
···
[[package]]
name = "event-listener-strategy"
-
version = "0.5.3"
+
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2"
+
checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
dependencies = [
"event-listener",
"pin-project-lite",
···
[[package]]
name = "fjall"
-
version = "2.7.0"
-
source = "git+https://github.com/fjall-rs/fjall.git?branch=fix%2Flockless-ranges#d2102006958b0b30bdde0f7315b9b22539bb5f89"
+
version = "2.8.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "26b2ced3483989a62b3533c9f99054d73b527c6c0045cf22b00fe87956f1a46f"
dependencies = [
"byteorder",
"byteview",
···
"libc",
"log",
"rustversion",
-
"windows 0.58.0",
+
"windows",
[[package]]
···
"futures-core",
"futures-sink",
"http",
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"slab",
"tokio",
"tokio-util",
···
[[package]]
name = "hostname"
-
version = "0.4.0"
+
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
+
checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65"
dependencies = [
"cfg-if",
"libc",
-
"windows 0.52.0",
+
"windows-link",
[[package]]
···
[[package]]
name = "hyper-util"
-
version = "0.1.10"
+
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
+
checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2"
dependencies = [
"bytes",
"futures-channel",
···
"http",
"http-body",
"hyper",
+
"libc",
"pin-project-lite",
"socket2",
"tokio",
···
[[package]]
name = "iana-time-zone"
-
version = "0.1.62"
+
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "b2fd658b06e56721792c5df4475705b6cda790e9298d19d2f8af083457bcd127"
+
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
dependencies = [
"android_system_properties",
"core-foundation-sys",
···
"js-sys",
"log",
"wasm-bindgen",
-
"windows-core 0.52.0",
+
"windows-core 0.61.0",
[[package]]
···
[[package]]
name = "indexmap"
-
version = "2.8.0"
+
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058"
+
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
dependencies = [
"equivalent",
"hashbrown 0.15.2",
···
[[package]]
name = "jiff"
-
version = "0.2.5"
+
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260"
+
checksum = "1f33145a5cbea837164362c7bd596106eb7c5198f97d1ba6f6ebb3223952e488"
dependencies = [
"jiff-static",
"log",
···
[[package]]
name = "jiff-static"
-
version = "0.2.5"
+
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c"
+
checksum = "43ce13c40ec6956157a3635d97a1ee2df323b263f09ea14165131289cb0f5c19"
dependencies = [
"proc-macro2",
"quote",
···
[[package]]
name = "jobserver"
-
version = "0.1.32"
+
version = "0.1.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
+
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
dependencies = [
+
"getrandom 0.3.2",
"libc",
···
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]]
+
name = "libfuzzer-sys"
+
version = "0.4.9"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75"
+
dependencies = [
+
"arbitrary",
+
"cc",
+
]
+
+
[[package]]
name = "libloading"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
[[package]]
name = "linux-raw-sys"
-
version = "0.9.3"
+
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413"
+
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "litemap"
···
[[package]]
name = "lsm-tree"
-
version = "2.7.0"
-
source = "git+https://github.com/fjall-rs/lsm-tree.git?branch=fix%2Flockless-ranges#c1684bdf57488a6195942fde5ea0c756dc0b6035"
+
version = "2.8.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "d0a63a5e98a38b51765274137d8aedfbd848da5f4d016867e186b673fcc06a8c"
dependencies = [
"byteorder",
"crossbeam-skiplist",
···
"guardian",
"interval-heap",
"log",
+
"lz4_flex",
"path-absolutize",
"quick_cache",
"rustc-hash 2.1.1",
···
"cc",
"libc",
+
+
[[package]]
+
name = "lz4_flex"
+
version = "0.11.3"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
[[package]]
name = "mach2"
···
"http-body-util",
"hyper",
"hyper-util",
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"ipnet",
"metrics",
"metrics-util",
···
"once_cell",
"procfs",
"rlimit",
-
"windows 0.58.0",
+
"windows",
[[package]]
···
[[package]]
name = "miniz_oxide"
-
version = "0.8.5"
+
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
+
checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a"
dependencies = [
"adler2",
···
[[package]]
name = "once_cell"
-
version = "1.21.1"
+
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc"
+
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "openapiv3"
···
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c"
dependencies = [
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"serde",
"serde_json",
···
[[package]]
name = "openssl-src"
-
version = "300.4.2+3.4.1"
+
version = "300.5.0+3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "168ce4e058f975fe43e89d9ccf78ca668601887ae736090aacc23ae353c298e2"
+
checksum = "e8ce546f549326b0e6052b649198487d91320875da901e7bd11a06d1ee3f9c2f"
dependencies = [
"cc",
···
[[package]]
name = "redox_syscall"
-
version = "0.5.10"
+
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1"
+
checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3"
dependencies = [
"bitflags",
···
[[package]]
name = "rustix"
-
version = "1.0.3"
+
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96"
+
checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf"
dependencies = [
"bitflags",
"errno",
"libc",
-
"linux-raw-sys 0.9.3",
+
"linux-raw-sys 0.9.4",
"windows-sys 0.59.0",
···
checksum = "9d2de91cf02bbc07cde38891769ccd5d4f073d22a40683aa4bc7a95781aaa2c4"
dependencies = [
"form_urlencoded",
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"itoa",
"ryu",
"serde",
···
"chrono",
"hex",
"indexmap 1.9.3",
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"serde",
"serde_derive",
"serde_json",
···
[[package]]
name = "smallvec"
-
version = "1.14.0"
+
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
+
checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9"
[[package]]
name = "socket2"
-
version = "0.5.8"
+
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
+
checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef"
dependencies = [
"libc",
"windows-sys 0.52.0",
···
"fastrand",
"getrandom 0.3.2",
"once_cell",
-
"rustix 1.0.3",
+
"rustix 1.0.5",
"windows-sys 0.59.0",
···
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
dependencies = [
-
"indexmap 2.8.0",
+
"indexmap 2.9.0",
"serde",
"serde_spanned",
"toml_datetime",
···
version = "0.1.0"
dependencies = [
"anyhow",
+
"async-trait",
"bincode 2.0.1",
+
"cardinality-estimator",
"clap",
"dropshot",
"env_logger",
···
"semver",
"serde",
"serde_json",
+
"tempfile",
"thiserror 2.0.12",
"tikv-jemallocator",
"tokio",
[[package]]
+
name = "ufos-fuzz"
+
version = "0.0.0"
+
dependencies = [
+
"bincode 2.0.1",
+
"cardinality-estimator",
+
"jetstream",
+
"libfuzzer-sys",
+
"tikv-jemallocator",
+
"ufos",
+
]
+
+
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
[[package]]
name = "value-log"
-
version = "1.7.2"
+
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "d65573c63cf768179763226edb8d614d8b314130a3f50422d6d375d3947c529f"
+
checksum = "fd29b17c041f94e0885179637289815cd038f0c9fc19c4549d5a97017404fb7d"
dependencies = [
"byteorder",
-
"bytes",
"byteview",
"interval-heap",
"log",
"path-absolutize",
-
"quick_cache",
"rustc-hash 2.1.1",
"tempfile",
"varint-rs",
···
[[package]]
name = "windows"
-
version = "0.52.0"
+
version = "0.58.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
+
checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
dependencies = [
-
"windows-core 0.52.0",
+
"windows-core 0.58.0",
"windows-targets",
[[package]]
-
name = "windows"
+
name = "windows-core"
version = "0.58.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
+
checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
dependencies = [
-
"windows-core 0.58.0",
+
"windows-implement 0.58.0",
+
"windows-interface 0.58.0",
+
"windows-result 0.2.0",
+
"windows-strings 0.1.0",
"windows-targets",
[[package]]
name = "windows-core"
-
version = "0.52.0"
+
version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+
checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980"
dependencies = [
-
"windows-targets",
+
"windows-implement 0.60.0",
+
"windows-interface 0.59.1",
+
"windows-link",
+
"windows-result 0.3.2",
+
"windows-strings 0.4.0",
[[package]]
-
name = "windows-core"
+
name = "windows-implement"
version = "0.58.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
+
checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
dependencies = [
-
"windows-implement",
-
"windows-interface",
-
"windows-result",
-
"windows-strings",
-
"windows-targets",
+
"proc-macro2",
+
"quote",
+
"syn",
[[package]]
name = "windows-implement"
-
version = "0.58.0"
+
version = "0.60.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
+
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
"proc-macro2",
"quote",
···
[[package]]
+
name = "windows-interface"
+
version = "0.59.1"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+
dependencies = [
+
"proc-macro2",
+
"quote",
+
"syn",
+
]
+
+
[[package]]
name = "windows-link"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
[[package]]
+
name = "windows-result"
+
version = "0.3.2"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252"
+
dependencies = [
+
"windows-link",
+
]
+
+
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
-
"windows-result",
+
"windows-result 0.2.0",
"windows-targets",
+
]
+
+
[[package]]
+
name = "windows-strings"
+
version = "0.4.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97"
+
dependencies = [
+
"windows-link",
[[package]]
···
[[package]]
name = "winnow"
-
version = "0.7.4"
+
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36"
+
checksum = "63d3fcd9bba44b03821e7d699eeee959f3126dcc4aa8e4ae18ec617c2a5cea10"
dependencies = [
"memchr",
···
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+
+
[[package]]
+
name = "wyhash"
+
version = "0.5.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295"
+
dependencies = [
+
"rand_core 0.6.4",
+
]
[[package]]
name = "xxhash-rust"
+1 -3
Cargo.toml
···
"constellation",
"jetstream",
"ufos",
+
"ufos/fuzz",
]
-
-
[patch.crates-io]
-
lsm-tree = { git = "https://github.com/fjall-rs/lsm-tree.git", branch = "fix/lockless-ranges" }
+1 -1
constellation/templates/base.html.j2
···
</style>
</head>
<body class="{% block body_classes %}{% endblock %}">
-
<h1><a href="/">This</a> is a <a href="https://github.com/at-ucosm/links/tree/main/constellation">constellation 🌌</a> API server from <a href="https://github.com/at-microcosm">microcosm</a> ✨</h1>
+
<h1><a href="/">This</a> is a <a href="https://github.com/at-microcosm/links/tree/main/constellation">constellation 🌌</a> API server from <a href="https://github.com/at-microcosm">microcosm</a> ✨</h1>
{% block content %}{% endblock %}
<footer>
+2 -2
jetstream/Cargo.toml
···
[dependencies]
async-trait = "0.1.83"
-
atrium-api = { version = "0.25", default-features = false, features = [
+
atrium-api = { version = "0.25.2", default-features = false, features = [
"namespace-appbsky",
] }
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
···
futures-util = "0.3.31"
url = "2.5.4"
serde = { version = "1.0.215", features = ["derive"] }
-
serde_json = "1.0.132"
+
serde_json = { version = "1.0.140", features = ["raw_value"] }
chrono = "0.4.38"
zstd = "0.13.2"
thiserror = "2.0.3"
+22 -5
jetstream/examples/arbitrary_record.rs
···
use clap::Parser;
use jetstream::{
events::{
-
commit::CommitEvent,
-
JetstreamEvent::Commit,
+
CommitOp,
+
EventKind,
+
JetstreamEvent,
},
DefaultJetstreamEndpoints,
JetstreamCompression,
···
let args = Args::parse();
let dids = args.did.unwrap_or_default();
-
let config: JetstreamConfig<serde_json::Value> = JetstreamConfig {
+
let config: JetstreamConfig = JetstreamConfig {
endpoint: DefaultJetstreamEndpoints::USEastOne.into(),
wanted_collections: vec![args.nsid.clone()],
wanted_dids: dids.clone(),
···
);
while let Some(event) = receiver.recv().await {
-
if let Commit(CommitEvent::CreateOrUpdate { commit, .. }) = event {
-
println!("got record: {:?}", commit.record);
+
if let JetstreamEvent {
+
kind: EventKind::Commit,
+
commit: Some(commit),
+
..
+
} = event
+
{
+
if commit.collection != args.nsid {
+
continue;
+
}
+
if !(commit.operation == CommitOp::Create || commit.operation == CommitOp::Update) {
+
continue;
+
}
+
let Some(rec) = commit.record else { continue };
+
println!(
+
"New or updated record! ({})\n{:?}\n",
+
commit.rkey.as_str(),
+
rec.get()
+
);
}
}
+20 -31
jetstream/examples/basic.rs
···
use clap::Parser;
use jetstream::{
events::{
-
commit::{
-
CommitEvent,
-
CommitType,
-
},
-
JetstreamEvent::Commit,
+
CommitEvent,
+
CommitOp,
+
EventKind,
+
JetstreamEvent,
},
DefaultJetstreamEndpoints,
JetstreamCompression,
···
/// The DIDs to listen for events on, if not provided we will listen for all DIDs.
#[arg(short, long)]
did: Option<Vec<string::Did>>,
-
/// The NSID for the collection to listen for (e.g. `app.bsky.feed.post`).
-
#[arg(short, long)]
-
nsid: string::Nsid,
}
#[tokio::main]
···
let dids = args.did.unwrap_or_default();
let config = JetstreamConfig {
endpoint: DefaultJetstreamEndpoints::USEastOne.into(),
-
wanted_collections: vec![args.nsid.clone()],
+
wanted_collections: vec![string::Nsid::new("app.bsky.feed.post".to_string()).unwrap()],
wanted_dids: dids.clone(),
compression: JetstreamCompression::Zstd,
..Default::default()
···
let jetstream = JetstreamConnector::new(config)?;
let mut receiver = jetstream.connect().await?;
-
println!(
-
"Listening for '{}' events on DIDs: {:?}",
-
args.nsid.as_str(),
-
dids
-
);
+
println!("Listening for 'app.bsky.feed.post' events on DIDs: {dids:?}");
while let Some(event) = receiver.recv().await {
-
if let Commit(commit) = event {
-
match commit {
-
CommitEvent::CreateOrUpdate { info: _, commit }
-
if commit.info.operation == CommitType::Create =>
-
{
-
if let AppBskyFeedPost(record) = commit.record {
-
println!(
-
"New post created! ({})\n\n'{}'",
-
commit.info.rkey.as_str(),
-
record.text
-
);
-
}
-
}
-
CommitEvent::Delete { info: _, commit } => {
-
println!("A post has been deleted. ({})", commit.rkey.as_str());
-
}
-
_ => {}
+
if let JetstreamEvent {
+
kind: EventKind::Commit,
+
commit:
+
Some(CommitEvent {
+
operation: CommitOp::Create,
+
rkey,
+
record: Some(record),
+
..
+
}),
+
..
+
} = event
+
{
+
if let Ok(AppBskyFeedPost(rec)) = serde_json::from_str(record.get()) {
+
println!("New post created! ({})\n{:?}\n", rkey.as_str(), rec.text);
}
}
}
+205
jetstream/src/events.rs
···
+
use std::time::{
+
Duration,
+
SystemTime,
+
SystemTimeError,
+
UNIX_EPOCH,
+
};
+
+
use chrono::Utc;
+
use serde::{
+
Deserialize,
+
Serialize,
+
};
+
use serde_json::value::RawValue;
+
+
use crate::exports;
+
+
/// Opaque wrapper for the time_us cursor used by jetstream
+
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, PartialOrd)]
+
pub struct Cursor(u64);
+
+
#[derive(Debug, Deserialize)]
+
#[serde(rename_all = "snake_case")]
+
pub struct JetstreamEvent {
+
#[serde(rename = "time_us")]
+
pub cursor: Cursor,
+
pub did: exports::Did,
+
pub kind: EventKind,
+
pub commit: Option<CommitEvent>,
+
pub identity: Option<IdentityEvent>,
+
pub account: Option<AccountEvent>,
+
}
+
+
#[derive(Debug, Deserialize, PartialEq)]
+
#[serde(rename_all = "snake_case")]
+
pub enum EventKind {
+
Commit,
+
Identity,
+
Account,
+
}
+
+
#[derive(Debug, Deserialize)]
+
#[serde(rename_all = "snake_case")]
+
pub struct CommitEvent {
+
pub collection: exports::Nsid,
+
pub rkey: exports::RecordKey,
+
pub rev: String,
+
pub operation: CommitOp,
+
pub record: Option<Box<RawValue>>,
+
pub cid: Option<exports::Cid>,
+
}
+
+
#[derive(Debug, Deserialize, PartialEq)]
+
#[serde(rename_all = "snake_case")]
+
pub enum CommitOp {
+
Create,
+
Update,
+
Delete,
+
}
+
+
#[derive(Debug, Deserialize, PartialEq)]
+
pub struct IdentityEvent {
+
pub did: exports::Did,
+
pub handle: Option<exports::Handle>,
+
pub seq: u64,
+
pub time: chrono::DateTime<Utc>,
+
}
+
+
#[derive(Debug, Deserialize, PartialEq)]
+
pub struct AccountEvent {
+
pub active: bool,
+
pub did: exports::Did,
+
pub seq: u64,
+
pub time: chrono::DateTime<Utc>,
+
pub status: Option<String>,
+
}
+
+
impl Cursor {
+
/// Get a cursor that will consume all available jetstream replay
+
///
+
/// This sets the cursor to zero.
+
///
+
/// Jetstream instances typically only have a few days of replay.
+
pub fn from_start() -> Self {
+
Self(0)
+
}
+
/// Get a cursor for a specific time
+
///
+
/// Panics: if t is older than the unix epoch: Jan 1, 1970.
+
///
+
/// If you want to receive all available jetstream replay (typically a few days), use
+
/// .from_start()
+
///
+
/// Warning: this exploits the internal implementation detail of jetstream cursors
+
/// being ~microsecond timestamps.
+
pub fn at(t: SystemTime) -> Self {
+
let unix_dt = t
+
.duration_since(UNIX_EPOCH)
+
.expect("cannot set jetstream cursor earlier than unix epoch");
+
Self(unix_dt.as_micros() as u64)
+
}
+
/// Get a cursor rewound from now by this amount
+
///
+
/// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970.
+
///
+
/// Jetstream instances typically only have a few days of replay.
+
///
+
/// Warning: this exploits the internal implementation detail of jetstream cursors
+
/// being ~microsecond timestamps.
+
pub fn back_by(d: Duration) -> Self {
+
Self::at(SystemTime::now() - d)
+
}
+
/// Get a Cursor from a raw u64
+
///
+
/// For example, from a jetstream event's `time_us` field.
+
pub fn from_raw_u64(time_us: u64) -> Self {
+
Self(time_us)
+
}
+
/// Get the raw u64 value from this cursor.
+
pub fn to_raw_u64(&self) -> u64 {
+
self.0
+
}
+
/// Format the cursor value for use in a jetstream connection url querystring
+
pub fn to_jetstream(&self) -> String {
+
self.0.to_string()
+
}
+
/// Compute the time span since an earlier cursor or [SystemTime]
+
///
+
/// Warning: this exploits the internal implementation detail of jetstream cursors
+
/// being ~microsecond timestamps.
+
pub fn duration_since(
+
&self,
+
earlier: impl Into<SystemTime>,
+
) -> Result<Duration, SystemTimeError> {
+
let t: SystemTime = self.into();
+
t.duration_since(earlier.into())
+
}
+
/// Compute the age of the cursor vs the local clock
+
///
+
/// Warning: this exploits the internal implementation detail of jetstream cursors
+
pub fn elapsed(&self) -> Result<Duration, SystemTimeError> {
+
let t: SystemTime = self.into();
+
t.elapsed()
+
}
+
/// Get the immediate next cursor value
+
///
+
/// This is possible for the implementation of jetstream cursors
+
pub fn next(&self) -> Cursor {
+
Self(self.0 + 1)
+
}
+
}
+
+
impl From<&Cursor> for SystemTime {
+
/// Convert a cursor directly to a [SystemTime]
+
///
+
/// Warning: this exploits the internal implementation detail of jetstream cursors
+
/// being ~microsecond timestamps.
+
fn from(c: &Cursor) -> Self {
+
UNIX_EPOCH + Duration::from_micros(c.0)
+
}
+
}
+
+
#[cfg(test)]
+
mod test {
+
use super::*;
+
+
#[test]
+
fn test_parse_commit_event() -> anyhow::Result<()> {
+
let json = r#"{
+
"rev":"3llrdsginou2i",
+
"operation":"create",
+
"collection":"app.bsky.feed.post",
+
"rkey":"3llrdsglqdc2s",
+
"cid": "bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy",
+
"record": {"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}
+
}"#;
+
let commit: CommitEvent = serde_json::from_str(json)?;
+
assert_eq!(
+
commit.cid.unwrap(),
+
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy".parse()?
+
);
+
assert_eq!(
+
commit.record.unwrap().get(),
+
r#"{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}"#
+
);
+
Ok(())
+
}
+
+
#[test]
+
fn test_parse_whole_event() -> anyhow::Result<()> {
+
let json = r#"{"did":"did:plc:ai3dzf35cth7s3st7n7jsd7r","time_us":1743526687419798,"kind":"commit","commit":{"rev":"3llrdsginou2i","operation":"create","collection":"app.bsky.feed.post","rkey":"3llrdsglqdc2s","record":{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"},"cid":"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"}}"#;
+
let event: JetstreamEvent = serde_json::from_str(json)?;
+
assert_eq!(event.kind, EventKind::Commit);
+
assert!(event.commit.is_some());
+
let commit = event.commit.unwrap();
+
assert_eq!(
+
commit.cid.unwrap(),
+
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy".parse()?
+
);
+
assert_eq!(
+
commit.record.unwrap().get(),
+
r#"{"$type":"app.bsky.feed.post","createdAt":"2025-04-01T16:58:06.154Z","langs":["en"],"text":"I wish apirl 1st would stop existing lol"}"#
+
);
+
Ok(())
+
}
+
}
-40
jetstream/src/events/account.rs
···
-
use chrono::Utc;
-
use serde::Deserialize;
-
-
use crate::{
-
events::EventInfo,
-
exports,
-
};
-
-
/// An event representing a change to an account.
-
#[derive(Deserialize, Debug)]
-
pub struct AccountEvent {
-
/// Basic metadata included with every event.
-
#[serde(flatten)]
-
pub info: EventInfo,
-
/// Account specific data bundled with this event.
-
pub account: AccountData,
-
}
-
-
/// Account specific data bundled with an account event.
-
#[derive(Deserialize, Debug)]
-
pub struct AccountData {
-
/// Whether the account is currently active.
-
pub active: bool,
-
/// The DID of the account.
-
pub did: exports::Did,
-
pub seq: u64,
-
pub time: chrono::DateTime<Utc>,
-
/// If `active` is `false` this will be present to explain why the account is inactive.
-
pub status: Option<AccountStatus>,
-
}
-
-
/// The possible reasons an account might be listed as inactive.
-
#[derive(Deserialize, Debug)]
-
#[serde(rename_all = "lowercase")]
-
pub enum AccountStatus {
-
Deactivated,
-
Deleted,
-
Suspended,
-
TakenDown,
-
}
-55
jetstream/src/events/commit.rs
···
-
use serde::Deserialize;
-
-
use crate::{
-
events::EventInfo,
-
exports,
-
};
-
-
/// An event representing a repo commit, which can be a `create`, `update`, or `delete` operation.
-
#[derive(Deserialize, Debug)]
-
#[serde(untagged, rename_all = "snake_case")]
-
pub enum CommitEvent<R> {
-
CreateOrUpdate {
-
#[serde(flatten)]
-
info: EventInfo,
-
commit: CommitData<R>,
-
},
-
Delete {
-
#[serde(flatten)]
-
info: EventInfo,
-
commit: CommitInfo,
-
},
-
}
-
-
/// The type of commit operation that was performed.
-
#[derive(Deserialize, Debug, PartialEq)]
-
#[serde(rename_all = "snake_case")]
-
pub enum CommitType {
-
Create,
-
Update,
-
Delete,
-
}
-
-
/// Basic commit specific info bundled with every event, also the only data included with a `delete`
-
/// operation.
-
#[derive(Deserialize, Debug)]
-
pub struct CommitInfo {
-
/// The type of commit operation that was performed.
-
pub operation: CommitType,
-
pub rev: String,
-
pub rkey: exports::RecordKey,
-
/// The NSID of the record type that this commit is associated with.
-
pub collection: exports::Nsid,
-
}
-
-
/// Detailed data bundled with a commit event. This data is only included when the event is
-
/// `create` or `update`.
-
#[derive(Deserialize, Debug)]
-
pub struct CommitData<R> {
-
#[serde(flatten)]
-
pub info: CommitInfo,
-
/// The CID of the record that was operated on.
-
pub cid: exports::Cid,
-
/// The record that was operated on.
-
pub record: R,
-
}
-28
jetstream/src/events/identity.rs
···
-
use chrono::Utc;
-
use serde::Deserialize;
-
-
use crate::{
-
events::EventInfo,
-
exports,
-
};
-
-
/// An event representing a change to an identity.
-
#[derive(Deserialize, Debug)]
-
pub struct IdentityEvent {
-
/// Basic metadata included with every event.
-
#[serde(flatten)]
-
pub info: EventInfo,
-
/// Identity specific data bundled with this event.
-
pub identity: IdentityData,
-
}
-
-
/// Identity specific data bundled with an identity event.
-
#[derive(Deserialize, Debug)]
-
pub struct IdentityData {
-
/// The DID of the identity.
-
pub did: exports::Did,
-
/// The handle associated with the identity.
-
pub handle: Option<exports::Handle>,
-
pub seq: u64,
-
pub time: chrono::DateTime<Utc>,
-
}
-138
jetstream/src/events/mod.rs
···
-
pub mod account;
-
pub mod commit;
-
pub mod identity;
-
-
use std::time::{
-
Duration,
-
SystemTime,
-
SystemTimeError,
-
UNIX_EPOCH,
-
};
-
-
use serde::Deserialize;
-
-
use crate::exports;
-
-
/// Opaque wrapper for the time_us cursor used by jetstream
-
///
-
/// Generally, you should use a cursor
-
#[derive(Deserialize, Debug, Clone, PartialEq, PartialOrd)]
-
pub struct Cursor(u64);
-
-
/// Basic data that is included with every event.
-
#[derive(Deserialize, Debug)]
-
pub struct EventInfo {
-
pub did: exports::Did,
-
pub time_us: Cursor,
-
pub kind: EventKind,
-
}
-
-
#[derive(Deserialize, Debug)]
-
#[serde(untagged)]
-
pub enum JetstreamEvent<R> {
-
Commit(commit::CommitEvent<R>),
-
Identity(identity::IdentityEvent),
-
Account(account::AccountEvent),
-
}
-
-
#[derive(Deserialize, Debug)]
-
#[serde(rename_all = "snake_case")]
-
pub enum EventKind {
-
Commit,
-
Identity,
-
Account,
-
}
-
-
impl<R> JetstreamEvent<R> {
-
pub fn cursor(&self) -> Cursor {
-
match self {
-
JetstreamEvent::Commit(commit::CommitEvent::CreateOrUpdate { info, .. }) => {
-
info.time_us.clone()
-
}
-
JetstreamEvent::Commit(commit::CommitEvent::Delete { info, .. }) => {
-
info.time_us.clone()
-
}
-
JetstreamEvent::Identity(e) => e.info.time_us.clone(),
-
JetstreamEvent::Account(e) => e.info.time_us.clone(),
-
}
-
}
-
}
-
-
impl Cursor {
-
/// Get a cursor that will consume all available jetstream replay
-
///
-
/// This sets the cursor to zero.
-
///
-
/// Jetstream instances typically only have a few days of replay.
-
pub fn from_start() -> Self {
-
Self(0)
-
}
-
/// Get a cursor for a specific time
-
///
-
/// Panics: if t is older than the unix epoch: Jan 1, 1970.
-
///
-
/// If you want to receive all available jetstream replay (typically a few days), use
-
/// .from_start()
-
///
-
/// Warning: this exploits the internal implementation detail of jetstream cursors
-
/// being ~microsecond timestamps.
-
pub fn at(t: SystemTime) -> Self {
-
let unix_dt = t
-
.duration_since(UNIX_EPOCH)
-
.expect("cannot set jetstream cursor earlier than unix epoch");
-
Self(unix_dt.as_micros() as u64)
-
}
-
/// Get a cursor rewound from now by this amount
-
///
-
/// Panics: if d is greater than the time since the unix epoch: Jan 1, 1970.
-
///
-
/// Jetstream instances typically only have a few days of replay.
-
///
-
/// Warning: this exploits the internal implementation detail of jetstream cursors
-
/// being ~microsecond timestamps.
-
pub fn back_by(d: Duration) -> Self {
-
Self::at(SystemTime::now() - d)
-
}
-
/// Get a Cursor from a raw u64
-
///
-
/// For example, from a jetstream event's `time_us` field.
-
pub fn from_raw_u64(time_us: u64) -> Self {
-
Self(time_us)
-
}
-
/// Get the raw u64 value from this cursor.
-
pub fn to_raw_u64(&self) -> u64 {
-
self.0
-
}
-
/// Format the cursor value for use in a jetstream connection url querystring
-
pub fn to_jetstream(&self) -> String {
-
self.0.to_string()
-
}
-
/// Compute the time span since an earlier cursor or [SystemTime]
-
///
-
/// Warning: this exploits the internal implementation detail of jetstream cursors
-
/// being ~microsecond timestamps.
-
pub fn duration_since(
-
&self,
-
earlier: impl Into<SystemTime>,
-
) -> Result<Duration, SystemTimeError> {
-
let t: SystemTime = self.into();
-
t.duration_since(earlier.into())
-
}
-
/// Compute the age of the cursor vs the local clock
-
///
-
/// Warning: this exploits the internal implementation detail of jetstream cursors
-
pub fn elapsed(&self) -> Result<Duration, SystemTimeError> {
-
let t: SystemTime = self.into();
-
t.elapsed()
-
}
-
}
-
-
impl From<&Cursor> for SystemTime {
-
/// Convert a cursor directly to a [SystemTime]
-
///
-
/// Warning: this exploits the internal implementation detail of jetstream cursors
-
/// being ~microsecond timestamps.
-
fn from(c: &Cursor) -> Self {
-
UNIX_EPOCH + Duration::from_micros(c.0)
-
}
-
}
+21 -40
jetstream/src/lib.rs
···
pub mod exports;
use std::{
-
io::{
-
Cursor as IoCursor,
-
Read,
-
},
-
marker::PhantomData,
+
io::Cursor as IoCursor,
time::{
Duration,
Instant,
},
};
-
use atrium_api::record::KnownRecord;
use futures_util::{
stream::StreamExt,
SinkExt,
};
-
use serde::de::DeserializeOwned;
use tokio::{
net::TcpStream,
sync::mpsc::{
···
const JETSTREAM_ZSTD_DICTIONARY: &[u8] = include_bytes!("../zstd/dictionary");
/// A receiver channel for consuming Jetstream events.
-
pub type JetstreamReceiver<R> = Receiver<JetstreamEvent<R>>;
+
pub type JetstreamReceiver = Receiver<JetstreamEvent>;
/// An internal sender channel for sending Jetstream events to [JetstreamReceiver]'s.
-
type JetstreamSender<R> = Sender<JetstreamEvent<R>>;
+
type JetstreamSender = Sender<JetstreamEvent>;
/// A wrapper connector type for working with a WebSocket connection to a Jetstream instance to
/// receive and consume events. See [JetstreamConnector::connect] for more info.
-
pub struct JetstreamConnector<R: DeserializeOwned> {
+
pub struct JetstreamConnector {
/// The configuration for the Jetstream connection.
-
config: JetstreamConfig<R>,
+
config: JetstreamConfig,
}
pub enum JetstreamCompression {
···
}
}
-
pub struct JetstreamConfig<R: DeserializeOwned = KnownRecord> {
+
pub struct JetstreamConfig {
/// A Jetstream endpoint to connect to with a WebSocket Scheme i.e.
/// `wss://jetstream1.us-east.bsky.network/subscribe`.
pub endpoint: String,
···
/// can help prevent that if your consumer sometimes pauses, at a cost of higher memory
/// usage while events are buffered.
pub channel_size: usize,
-
/// Marker for record deserializable type.
-
///
-
/// See examples/arbitrary_record.rs for an example using serde_json::Value
-
///
-
/// You can omit this if you construct `JetstreamConfig { a: b, ..Default::default() }.
-
/// If you have to specify it, use `std::marker::PhantomData` with no type parameters.
-
pub record_type: PhantomData<R>,
}
-
impl<R: DeserializeOwned> Default for JetstreamConfig<R> {
+
impl Default for JetstreamConfig {
fn default() -> Self {
JetstreamConfig {
endpoint: DefaultJetstreamEndpoints::USEastOne.into(),
···
omit_user_agent_jetstream_info: false,
replay_on_reconnect: false,
channel_size: 4096, // a few seconds of firehose buffer
-
record_type: PhantomData,
}
}
}
-
impl<R: DeserializeOwned> JetstreamConfig<R> {
+
impl JetstreamConfig {
/// Constructs a new endpoint URL with the given [JetstreamConfig] applied.
pub fn get_request_builder(
&self,
···
}
}
-
impl<R: DeserializeOwned + Send + 'static> JetstreamConnector<R> {
+
impl JetstreamConnector {
/// Create a Jetstream connector with a valid [JetstreamConfig].
///
/// After creation, you can call [connect] to connect to the provided Jetstream instance.
-
pub fn new(config: JetstreamConfig<R>) -> Result<Self, ConfigValidationError> {
+
pub fn new(config: JetstreamConfig) -> Result<Self, ConfigValidationError> {
// We validate the configuration here so any issues are caught early.
config.validate()?;
Ok(JetstreamConnector { config })
···
///
/// A [JetstreamReceiver] is returned which can be used to respond to events. When all instances
/// of this receiver are dropped, the connection and task are automatically closed.
-
pub async fn connect(&self) -> Result<JetstreamReceiver<R>, ConnectionError> {
+
pub async fn connect(&self) -> Result<JetstreamReceiver, ConnectionError> {
self.connect_cursor(None).await
}
···
pub async fn connect_cursor(
&self,
cursor: Option<Cursor>,
-
) -> Result<JetstreamReceiver<R>, ConnectionError> {
+
) -> Result<JetstreamReceiver, ConnectionError> {
// We validate the config again for good measure. Probably not necessary but it can't hurt.
self.config
.validate()
···
loop {
let dict = DecoderDictionary::copy(JETSTREAM_ZSTD_DICTIONARY);
-
let req = match build_request(connect_cursor.clone()) {
+
let req = match build_request(connect_cursor) {
Ok(req) => req,
Err(e) => {
log::error!("Could not build jetstream websocket request: {e:?}");
···
}
};
-
let mut last_cursor = connect_cursor.clone();
+
let mut last_cursor = connect_cursor;
retry_attempt += 1;
if let Ok((ws_stream, _)) = connect_async(req).await {
let t_connected = Instant::now();
···
/// The main task that handles the WebSocket connection and sends [JetstreamEvent]'s to any
/// receivers that are listening for them.
-
async fn websocket_task<R: DeserializeOwned>(
+
async fn websocket_task(
dictionary: DecoderDictionary<'_>,
ws: WebSocketStream<MaybeTlsStream<TcpStream>>,
-
send_channel: JetstreamSender<R>,
+
send_channel: JetstreamSender,
last_cursor: &mut Option<Cursor>,
) -> Result<(), JetstreamEventError> {
// TODO: Use the write half to allow the user to change configuration settings on the fly.
···
Some(Ok(message)) => {
match message {
Message::Text(json) => {
-
let event: JetstreamEvent<R> = serde_json::from_str(&json)
+
let event: JetstreamEvent = serde_json::from_str(&json)
.map_err(JetstreamEventError::ReceivedMalformedJSON)?;
-
let event_cursor = event.cursor();
+
let event_cursor = event.cursor;
if let Some(last) = last_cursor {
if event_cursor <= *last {
···
}
Message::Binary(zstd_json) => {
let mut cursor = IoCursor::new(zstd_json);
-
let mut decoder = zstd::stream::Decoder::with_prepared_dictionary(
+
let decoder = zstd::stream::Decoder::with_prepared_dictionary(
&mut cursor,
&dictionary,
)
.map_err(JetstreamEventError::CompressionDictionaryError)?;
-
let mut json = String::new();
-
decoder
-
.read_to_string(&mut json)
-
.map_err(JetstreamEventError::CompressionDecoderError)?;
-
-
let event: JetstreamEvent<R> = serde_json::from_str(&json)
+
let event: JetstreamEvent = serde_json::from_reader(decoder)
.map_err(JetstreamEventError::ReceivedMalformedJSON)?;
-
let event_cursor = event.cursor();
+
let event_cursor = event.cursor;
if let Some(last) = last_cursor {
if event_cursor <= *last {
+6 -6
ufos/Cargo.toml
···
[dependencies]
anyhow = "1.0.97"
+
async-trait = "0.1.88"
bincode = { version = "2.0.1", features = ["serde"] }
+
cardinality-estimator = { version = "1.0.2", features = ["with_serde"] }
clap = { version = "4.5.31", features = ["derive"] }
dropshot = "0.16.0"
env_logger = "0.11.7"
+
fjall = { version = "2.8.0", features = ["lz4"] }
jetstream = { path = "../jetstream" }
log = "0.4.26"
lsm-tree = "2.6.6"
-
schemars = "0.8.22"
+
schemars = { version = "0.8.22", features = ["raw_value"] }
semver = "1.0.26"
serde = "1.0.219"
serde_json = "1.0.140"
···
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.6.0"
-
[dependencies.fjall]
-
git = "https://github.com/fjall-rs/fjall.git"
-
branch = "fix/lockless-ranges"
-
features = ["bytes", "single_writer_tx"]
-
default-features = false
+
[dev-dependencies]
+
tempfile = "3.19.1"
+4
ufos/fuzz/.gitignore
···
+
target
+
corpus
+
artifacts
+
coverage
+39
ufos/fuzz/Cargo.toml
···
+
[package]
+
name = "ufos-fuzz"
+
version = "0.0.0"
+
publish = false
+
edition = "2021"
+
+
[package.metadata]
+
cargo-fuzz = true
+
+
[dependencies]
+
libfuzzer-sys = "0.4"
+
ufos = { path = ".." }
+
jetstream = { path = "../../jetstream" }
+
bincode = { version = "2.0.1", features = ["serde"] }
+
cardinality-estimator = { version = "1.0.2", features = ["with_serde"] }
+
+
[target.'cfg(not(target_env = "msvc"))'.dependencies]
+
tikv-jemallocator = "0.6.0"
+
+
[[bin]]
+
name = "counts_value"
+
path = "fuzz_targets/counts_value.rs"
+
test = false
+
doc = false
+
bench = false
+
+
[[bin]]
+
name = "estimated_dids_value"
+
path = "fuzz_targets/estimated_dids_value.rs"
+
test = false
+
doc = false
+
bench = false
+
+
[[bin]]
+
name = "cardinality_estimator"
+
path = "fuzz_targets/cardinality_estimator.rs"
+
test = false
+
doc = false
+
bench = false
+20
ufos/fuzz/fuzz_targets/cardinality_estimator.rs
···
+
#![no_main]
+
+
use bincode::config::{Configuration, LittleEndian, Varint, Limit, standard};
+
use bincode::serde::decode_from_slice;
+
use cardinality_estimator::CardinalityEstimator;
+
use libfuzzer_sys::fuzz_target;
+
+
type C = Configuration<LittleEndian, Varint, Limit<1048576>>;
+
static BINCODE_CONF: C = standard().with_limit::<1048576>();
+
+
fuzz_target!(|data: &[u8]| {
+
if let Ok((mut estimator, _n)) = decode_from_slice::<CardinalityEstimator<String>, C>(
+
data,
+
BINCODE_CONF,
+
) {
+
// crash happens *much* faster if we just do kinda anything with the estimator
+
estimator.insert(&"asdf".to_string());
+
assert!(estimator.estimate() > 0);
+
}
+
});
+25
ufos/fuzz/fuzz_targets/counts_value.rs
···
+
#![no_main]
+
+
// use jetstream::exports::Did;
+
use ufos::db_types::DbBytes;
+
use ufos::store_types::CountsValue;
+
use libfuzzer_sys::fuzz_target;
+
+
#[cfg(not(target_env = "msvc"))]
+
use tikv_jemallocator::Jemalloc;
+
+
#[cfg(not(target_env = "msvc"))]
+
#[global_allocator]
+
static GLOBAL: Jemalloc = Jemalloc;
+
+
fuzz_target!(|data: &[u8]| {
+
if let Ok((counts_value, n)) = CountsValue::from_db_bytes(data) {
+
assert!(n <= data.len());
+
let serialized = counts_value.to_db_bytes().unwrap();
+
assert_eq!(serialized.len(), n);
+
let (and_back, n_again) = CountsValue::from_db_bytes(&serialized).unwrap();
+
assert_eq!(n_again, n);
+
assert_eq!(and_back.records(), counts_value.records());
+
assert_eq!(and_back.dids().estimate(), counts_value.dids().estimate());
+
}
+
});
+24
ufos/fuzz/fuzz_targets/estimated_dids_value.rs
···
+
#![no_main]
+
+
// use jetstream::exports::Did;
+
use ufos::db_types::DbBytes;
+
use ufos::store_types::EstimatedDidsValue;
+
use libfuzzer_sys::fuzz_target;
+
+
#[cfg(not(target_env = "msvc"))]
+
use tikv_jemallocator::Jemalloc;
+
+
#[cfg(not(target_env = "msvc"))]
+
#[global_allocator]
+
static GLOBAL: Jemalloc = Jemalloc;
+
+
fuzz_target!(|data: &[u8]| {
+
if let Ok((counts_value, n)) = EstimatedDidsValue::from_db_bytes(data) {
+
assert!(n <= data.len());
+
let serialized = counts_value.to_db_bytes().unwrap();
+
assert_eq!(serialized.len(), n);
+
let (and_back, n_again) = EstimatedDidsValue::from_db_bytes(&serialized).unwrap();
+
assert_eq!(n_again, n);
+
assert_eq!(and_back.0.estimate(), counts_value.0.estimate());
+
}
+
});
+15
ufos/readme.md
···
```bash
sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1
```
+
+
+
---
+
+
## fuzzing
+
+
got bit by https://github.com/cloudflare/cardinality-estimator/pull/12, so now we have a fuzz target.
+
+
install cargo-fuzz and then
+
+
```bash
+
RUSTFLAGS="-Z sanitizer=address" cargo +nightly fuzz run cardinality_estimator
+
```
+
+
to fuzz the counts value things
+92 -140
ufos/src/consumer.rs
···
use jetstream::{
-
events::{
-
account::AccountEvent,
-
commit::{CommitData, CommitEvent, CommitInfo, CommitType},
-
Cursor, EventInfo, JetstreamEvent,
-
},
-
exports::Did,
+
events::{Cursor, EventKind, JetstreamEvent},
+
exports::{Did, Nsid},
DefaultJetstreamEndpoints, JetstreamCompression, JetstreamConfig, JetstreamConnector,
JetstreamReceiver,
};
···
use std::time::Duration;
use tokio::sync::mpsc::{channel, Receiver, Sender};
-
use crate::{CreateRecord, DeleteAccount, DeleteRecord, EventBatch, ModifyRecord, UpdateRecord};
+
use crate::error::{BatchInsertError, FirehoseEventError};
+
use crate::{DeleteAccount, EventBatch, UFOsCommit};
-
const MAX_BATCHED_RECORDS: usize = 128; // *non-blocking* limit. drops oldest batched record per collection once reached.
-
const MAX_BATCHED_MODIFIES: usize = 512; // hard limit, total updates and deletes across all collections.
-
const MAX_ACCOUNT_REMOVES: usize = 512; // hard limit, total account deletions. actually the least frequent event, but tiny.
-
const MAX_BATCHED_COLLECTIONS: usize = 64; // hard limit, MAX_BATCHED_RECORDS applies per collection
-
const MIN_BATCH_SPAN_SECS: f64 = 2.; // try to get a bit of rest a bit.
-
const MAX_BATCH_SPAN_SECS: f64 = 60.; // hard limit of duration from oldest to latest event cursor within a batch, in seconds.
+
pub const MAX_BATCHED_RECORDS: usize = 128; // *non-blocking* limit. drops oldest batched record per collection once reached.
+
pub const MAX_ACCOUNT_REMOVES: usize = 1024; // hard limit, extremely unlikely to reach, but just in case
+
pub const MAX_BATCHED_COLLECTIONS: usize = 64; // hard limit, MAX_BATCHED_RECORDS applies per-collection
+
pub const MIN_BATCH_SPAN_SECS: f64 = 2.; // breathe
+
pub const MAX_BATCH_SPAN_SECS: f64 = 60.; // hard limit, pause consumer if we're unable to send by now
+
pub const SEND_TIMEOUT_S: f64 = 15.; // if the channel is blocked longer than this, something is probably up
+
pub const BATCH_QUEUE_SIZE: usize = 1; // nearly-rendez-vous
-
const SEND_TIMEOUT_S: f64 = 60.;
-
const BATCH_QUEUE_SIZE: usize = 512; // 4096 got OOM'd. update: 1024 also got OOM'd during L0 compaction blocking
+
pub type LimitedBatch = EventBatch<MAX_BATCHED_RECORDS>;
+
+
#[derive(Debug, Default)]
+
struct CurrentBatch {
+
initial_cursor: Option<Cursor>,
+
batch: LimitedBatch,
+
}
#[derive(Debug)]
-
struct Batcher {
-
jetstream_receiver: JetstreamReceiver<serde_json::Value>,
-
batch_sender: Sender<EventBatch>,
-
current_batch: EventBatch,
+
pub struct Batcher {
+
jetstream_receiver: JetstreamReceiver,
+
batch_sender: Sender<LimitedBatch>,
+
current_batch: CurrentBatch,
}
pub async fn consume(
jetstream_endpoint: &str,
cursor: Option<Cursor>,
no_compress: bool,
-
) -> anyhow::Result<Receiver<EventBatch>> {
+
) -> anyhow::Result<Receiver<LimitedBatch>> {
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(jetstream_endpoint);
if endpoint == jetstream_endpoint {
-
eprintln!("connecting to jetstream at {endpoint}");
+
log::info!("connecting to jetstream at {endpoint}");
} else {
-
eprintln!("connecting to jetstream at {jetstream_endpoint} => {endpoint}");
+
log::info!("connecting to jetstream at {jetstream_endpoint} => {endpoint}");
}
-
let config: JetstreamConfig<serde_json::Value> = JetstreamConfig {
+
let config: JetstreamConfig = JetstreamConfig {
endpoint,
compression: if no_compress {
JetstreamCompression::None
} else {
JetstreamCompression::Zstd
},
-
channel_size: 64, // small because we'd rather buffer events into batches
+
replay_on_reconnect: true,
+
channel_size: 1024, // buffer up to ~1s of jetstream events
..Default::default()
};
let jetstream_receiver = JetstreamConnector::new(config)?
.connect_cursor(cursor)
.await?;
-
let (batch_sender, batch_reciever) = channel::<EventBatch>(BATCH_QUEUE_SIZE);
+
let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE);
let mut batcher = Batcher::new(jetstream_receiver, batch_sender);
tokio::task::spawn(async move { batcher.run().await });
Ok(batch_reciever)
}
impl Batcher {
-
fn new(
-
jetstream_receiver: JetstreamReceiver<serde_json::Value>,
-
batch_sender: Sender<EventBatch>,
-
) -> Self {
+
pub fn new(jetstream_receiver: JetstreamReceiver, batch_sender: Sender<LimitedBatch>) -> Self {
Self {
jetstream_receiver,
batch_sender,
···
}
}
-
async fn run(&mut self) -> anyhow::Result<()> {
+
pub async fn run(&mut self) -> anyhow::Result<()> {
loop {
if let Some(event) = self.jetstream_receiver.recv().await {
self.handle_event(event).await?
···
}
}
-
async fn handle_event(
-
&mut self,
-
event: JetstreamEvent<serde_json::Value>,
-
) -> anyhow::Result<()> {
-
let event_cursor = event.cursor();
-
-
if let Some(earliest) = &self.current_batch.first_jetstream_cursor {
-
if event_cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS)
+
async fn handle_event(&mut self, event: JetstreamEvent) -> anyhow::Result<()> {
+
if let Some(earliest) = &self.current_batch.initial_cursor {
+
if event.cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS)
{
-
self.send_current_batch_now().await?;
+
self.send_current_batch_now(false).await?;
}
} else {
-
self.current_batch.first_jetstream_cursor = Some(event_cursor.clone());
+
self.current_batch.initial_cursor = Some(event.cursor);
}
-
match event {
-
JetstreamEvent::Commit(CommitEvent::CreateOrUpdate { commit, info }) => {
-
match commit.info.operation {
-
CommitType::Create => self.handle_create_record(commit, info).await?,
-
CommitType::Update => {
-
self.handle_modify_record(modify_update(commit, info))
-
.await?
-
}
-
CommitType::Delete => {
-
panic!("jetstream Commit::CreateOrUpdate had Delete operation type")
-
}
+
match event.kind {
+
EventKind::Commit => {
+
let commit = event
+
.commit
+
.ok_or(FirehoseEventError::CommitEventMissingCommit)?;
+
let (commit, nsid) = UFOsCommit::from_commit_info(commit, event.did, event.cursor)?;
+
self.handle_commit(commit, nsid).await?;
+
}
+
EventKind::Account => {
+
let account = event
+
.account
+
.ok_or(FirehoseEventError::AccountEventMissingAccount)?;
+
if !account.active {
+
self.handle_delete_account(event.did, event.cursor).await?;
}
}
-
JetstreamEvent::Commit(CommitEvent::Delete { commit, info }) => {
-
self.handle_modify_record(modify_delete(commit, info))
-
.await?
-
}
-
JetstreamEvent::Account(AccountEvent { info, account }) if !account.active => {
-
self.handle_remove_account(info.did, info.time_us).await?
-
}
-
JetstreamEvent::Account(_) => {} // ignore account *activations*
-
JetstreamEvent::Identity(_) => {} // identity events are noops for us
-
};
-
self.current_batch.last_jetstream_cursor = Some(event_cursor.clone());
+
_ => {}
+
}
// if the queue is empty and we have enough, send immediately. otherewise, let the current batch fill up.
-
if let Some(earliest) = &self.current_batch.first_jetstream_cursor {
-
if event_cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS
+
if let Some(earliest) = &self.current_batch.initial_cursor {
+
if event.cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS
&& self.batch_sender.capacity() == BATCH_QUEUE_SIZE
{
-
log::trace!("queue empty: immediately sending batch.");
-
if let Err(send_err) = self
-
.batch_sender
-
.send(mem::take(&mut self.current_batch))
-
.await
-
{
-
anyhow::bail!("Could not send batch, likely because the receiver closed or dropped: {send_err:?}");
-
}
+
self.send_current_batch_now(true).await?;
}
}
Ok(())
}
-
// holds up all consumer progress until it can send to the channel
-
// use this when the current batch is too full to add more to it
-
async fn send_current_batch_now(&mut self) -> anyhow::Result<()> {
-
log::warn!(
-
"attempting to send batch now (capacity: {})",
-
self.batch_sender.capacity()
+
async fn handle_commit(&mut self, commit: UFOsCommit, collection: Nsid) -> anyhow::Result<()> {
+
let optimistic_res = self.current_batch.batch.insert_commit_by_nsid(
+
&collection,
+
commit,
+
MAX_BATCHED_COLLECTIONS,
);
-
self.batch_sender
-
.send_timeout(
-
mem::take(&mut self.current_batch),
-
Duration::from_secs_f64(SEND_TIMEOUT_S),
-
)
-
.await?;
-
Ok(())
-
}
-
async fn handle_create_record(
-
&mut self,
-
commit: CommitData<serde_json::Value>,
-
info: EventInfo,
-
) -> anyhow::Result<()> {
-
if !self
-
.current_batch
-
.record_creates
-
.contains_key(&commit.info.collection)
-
&& self.current_batch.record_creates.len() >= MAX_BATCHED_COLLECTIONS
-
{
-
self.send_current_batch_now().await?;
+
if let Err(BatchInsertError::BatchFull(commit)) = optimistic_res {
+
self.send_current_batch_now(false).await?;
+
self.current_batch.batch.insert_commit_by_nsid(
+
&collection,
+
commit,
+
MAX_BATCHED_COLLECTIONS,
+
)?;
+
} else {
+
optimistic_res?;
}
-
let record = CreateRecord {
-
did: info.did,
-
rkey: commit.info.rkey,
-
record: commit.record,
-
cursor: info.time_us,
-
};
-
let collection = self
-
.current_batch
-
.record_creates
-
.entry(commit.info.collection)
-
.or_default();
-
collection.total_seen += 1;
-
collection.samples.push_front(record);
-
collection.samples.truncate(MAX_BATCHED_RECORDS);
-
Ok(())
-
}
-
async fn handle_modify_record(&mut self, modify_record: ModifyRecord) -> anyhow::Result<()> {
-
if self.current_batch.record_modifies.len() >= MAX_BATCHED_MODIFIES {
-
self.send_current_batch_now().await?;
-
}
-
self.current_batch.record_modifies.push(modify_record);
Ok(())
}
-
async fn handle_remove_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> {
-
if self.current_batch.account_removes.len() >= MAX_ACCOUNT_REMOVES {
-
self.send_current_batch_now().await?;
+
async fn handle_delete_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> {
+
if self.current_batch.batch.account_removes.len() >= MAX_ACCOUNT_REMOVES {
+
self.send_current_batch_now(false).await?;
}
self.current_batch
+
.batch
.account_removes
.push(DeleteAccount { did, cursor });
Ok(())
}
-
}
-
fn modify_update(commit: CommitData<serde_json::Value>, info: EventInfo) -> ModifyRecord {
-
ModifyRecord::Update(UpdateRecord {
-
did: info.did,
-
collection: commit.info.collection,
-
rkey: commit.info.rkey,
-
record: commit.record,
-
cursor: info.time_us,
-
})
-
}
-
-
fn modify_delete(commit_info: CommitInfo, info: EventInfo) -> ModifyRecord {
-
ModifyRecord::Delete(DeleteRecord {
-
did: info.did,
-
collection: commit_info.collection,
-
rkey: commit_info.rkey,
-
cursor: info.time_us,
-
})
+
// holds up all consumer progress until it can send to the channel
+
// use this when the current batch is too full to add more to it
+
async fn send_current_batch_now(&mut self, small: bool) -> anyhow::Result<()> {
+
let beginning = match self.current_batch.initial_cursor.map(|c| c.elapsed()) {
+
None => "unknown".to_string(),
+
Some(Ok(t)) => format!("{:?}", t),
+
Some(Err(e)) => format!("+{:?}", e.duration()),
+
};
+
log::info!(
+
"sending batch now from {beginning}, {}, queue capacity: {}",
+
if small { "small" } else { "full" },
+
self.batch_sender.capacity(),
+
);
+
let current = mem::take(&mut self.current_batch);
+
self.batch_sender
+
.send_timeout(current.batch, Duration::from_secs_f64(SEND_TIMEOUT_S))
+
.await?;
+
Ok(())
+
}
}
+51 -5
ufos/src/db_types.rs
···
UnterminatedString,
#[error("could not convert from utf8: {0}")]
NotUtf8(#[from] std::str::Utf8Error),
+
#[error("could not convert from utf8: {0}")]
+
NotUtf8String(#[from] std::string::FromUtf8Error),
#[error("could not get array from slice: {0}")]
BadSlice(#[from] std::array::TryFromSliceError),
#[error("wrong static prefix. expected {1:?}, found {0:?}")]
···
DecodeTooManyBytes(usize),
#[error("expected exclusive bound from lsm_tree (likely bug)")]
BadRangeBound,
+
#[error("expected a truncated u64 for mod {0}, found remainder: {1}")]
+
InvalidTruncated(u64, u64),
}
fn bincode_conf() -> impl Config {
-
standard().with_big_endian().with_fixed_int_encoding()
+
standard()
+
.with_big_endian()
+
.with_fixed_int_encoding()
+
.with_limit::<{ 2_usize.pow(20) }>() // 1MB
}
pub trait DbBytes {
···
pub fn to_prefix_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
self.prefix.to_db_bytes()
}
-
pub fn range_end(&self) -> Result<Vec<u8>, EncodingError> {
-
let prefix_bytes = self.prefix.to_db_bytes()?;
+
pub fn prefix_range_end(prefix: &P) -> Result<Vec<u8>, EncodingError> {
+
let prefix_bytes = prefix.to_db_bytes()?;
let (_, Bound::Excluded(range_end)) = prefix_to_range(&prefix_bytes) else {
return Err(EncodingError::BadRangeBound);
};
Ok(range_end.to_vec())
}
+
pub fn range_end(&self) -> Result<Vec<u8>, EncodingError> {
+
Self::prefix_range_end(&self.prefix)
+
}
pub fn range(&self) -> Result<Range<Vec<u8>>, EncodingError> {
let prefix_bytes = self.prefix.to_db_bytes()?;
let (Bound::Included(start), Bound::Excluded(end)) = prefix_to_range(&prefix_bytes) else {
···
Self: Sized,
{
let (prefix, eaten) = P::from_db_bytes(bytes)?;
+
assert!(
+
eaten <= bytes.len(),
+
"eaten({}) < len({})",
+
eaten,
+
bytes.len()
+
);
let Some(suffix_bytes) = bytes.get(eaten..) else {
return Err(EncodingError::DecodeMissingSuffix);
};
+
if suffix_bytes.is_empty() {
+
return Err(EncodingError::DecodeMissingSuffix);
+
};
let (suffix, also_eaten) = S::from_db_bytes(suffix_bytes)?;
+
assert!(
+
also_eaten <= suffix_bytes.len(),
+
"also eaten({}) < suffix len({})",
+
also_eaten,
+
suffix_bytes.len()
+
);
Ok((Self { prefix, suffix }, eaten + also_eaten))
}
}
···
impl<T> DbBytes for T
where
-
T: BincodeEncode + BincodeDecode<()> + UseBincodePlz + Sized,
+
T: BincodeEncode + BincodeDecode<()> + UseBincodePlz + Sized + std::fmt::Debug,
{
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
Ok(encode_to_vec(self, bincode_conf())?)
···
}
}
+
/// helper trait: impl on a type to get helpers to implement DbBytes
+
pub trait SerdeBytes: serde::Serialize + for<'a> serde::Deserialize<'a> {
+
fn to_bytes(&self) -> Result<Vec<u8>, EncodingError>
+
where
+
Self: std::fmt::Debug,
+
{
+
Ok(bincode::serde::encode_to_vec(self, bincode_conf())?)
+
}
+
fn from_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
+
Ok(bincode::serde::decode_from_slice(bytes, bincode_conf())?)
+
}
+
}
+
//////
+
+
impl DbBytes for Vec<u8> {
+
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
+
Ok(self.to_vec())
+
}
+
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
+
Ok((bytes.to_owned(), bytes.len()))
+
}
+
}
/// Lexicographic-sort-friendly null-terminating serialization for String
///
···
(1234, "", "empty string"),
(789, "aaaaa", "string and cursor"),
] {
-
eprintln!("{desc}");
let original = TwoThings {
prefix: Cursor::from_raw_u64(tired_prefix),
suffix: sad_suffix.to_string(),
+41
ufos/src/error.rs
···
+
use crate::db_types::EncodingError;
+
use crate::UFOsCommit;
+
use thiserror::Error;
+
+
#[derive(Debug, Error)]
+
pub enum FirehoseEventError {
+
#[error("Create/Update commit operation missing record data")]
+
CruMissingRecord,
+
#[error("Account event missing account info")]
+
AccountEventMissingAccount,
+
#[error("Commit event missing commit info")]
+
CommitEventMissingCommit,
+
}
+
+
#[derive(Debug, Error)]
+
pub enum BatchInsertError {
+
#[error("Batch is full and no creates are left to be truncated")]
+
BatchFull(UFOsCommit),
+
#[error("Bug: tried to index beyond batch limit: {0}")]
+
BatchOverflow(usize),
+
#[error("Bug: non-terminating head advancement??")]
+
BatchForever,
+
}
+
+
#[derive(Debug, Error)]
+
pub enum StorageError {
+
#[error("Failed to initialize: {0}")]
+
InitError(String),
+
#[error("DB seems to be in a bad state: {0}")]
+
BadStateError(String),
+
#[error("Fjall error")]
+
FjallError(#[from] fjall::Error),
+
#[error("LSM-tree error (from fjall)")]
+
FjallLsmError(#[from] fjall::LsmError),
+
#[error("Bytes encoding error")]
+
EncodingError(#[from] EncodingError),
+
#[error("If you ever see this, there's a bug in the code. The error was stolen")]
+
Stolen,
+
#[error("Failed to join tokio task: {0}")]
+
JoinError(#[from] tokio::task::JoinError),
+
}
+32
ufos/src/file_consumer.rs
···
+
use crate::consumer::{Batcher, LimitedBatch, BATCH_QUEUE_SIZE};
+
use anyhow::Result;
+
use jetstream::{error::JetstreamEventError, events::JetstreamEvent};
+
use std::path::PathBuf;
+
use tokio::{
+
fs::File,
+
io::{AsyncBufReadExt, BufReader},
+
sync::mpsc::{channel, Receiver, Sender},
+
};
+
+
async fn read_jsonl(f: File, sender: Sender<JetstreamEvent>) -> Result<()> {
+
let mut lines = BufReader::new(f).lines();
+
while let Some(line) = lines.next_line().await? {
+
let event: JetstreamEvent =
+
serde_json::from_str(&line).map_err(JetstreamEventError::ReceivedMalformedJSON)?;
+
if sender.send(event).await.is_err() {
+
log::warn!("All receivers for the jsonl fixture have been dropped, bye.");
+
return Err(JetstreamEventError::ReceiverClosedError.into());
+
}
+
}
+
Ok(())
+
}
+
+
pub async fn consume(p: PathBuf) -> Result<Receiver<LimitedBatch>> {
+
let f = File::open(p).await?;
+
let (jsonl_sender, jsonl_receiver) = channel::<JetstreamEvent>(16);
+
let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE);
+
let mut batcher = Batcher::new(jsonl_receiver, batch_sender);
+
tokio::task::spawn(async move { read_jsonl(f, jsonl_sender).await });
+
tokio::task::spawn(async move { batcher.run().await });
+
Ok(batch_reciever)
+
}
+429 -33
ufos/src/lib.rs
···
pub mod consumer;
pub mod db_types;
+
pub mod error;
+
pub mod file_consumer;
pub mod server;
-
pub mod store;
+
pub mod storage;
+
pub mod storage_fjall;
+
pub mod storage_mem;
pub mod store_types;
-
use jetstream::events::Cursor;
+
use crate::error::BatchInsertError;
+
use cardinality_estimator::CardinalityEstimator;
+
use error::FirehoseEventError;
+
use jetstream::events::{CommitEvent, CommitOp, Cursor};
use jetstream::exports::{Did, Nsid, RecordKey};
-
use std::collections::{HashMap, VecDeque};
-
-
#[derive(Debug, Clone)]
-
pub struct CreateRecord {
-
pub did: Did,
-
// collection omitted because the batch keys off it
-
pub rkey: RecordKey,
-
pub record: serde_json::Value,
-
pub cursor: Cursor,
-
}
+
use schemars::JsonSchema;
+
use serde::Serialize;
+
use serde_json::value::RawValue;
+
use std::collections::HashMap;
#[derive(Debug, Default, Clone)]
-
pub struct CollectionSamples {
+
pub struct CollectionCommits<const LIMIT: usize> {
pub total_seen: usize,
-
pub samples: VecDeque<CreateRecord>,
+
pub dids_estimate: CardinalityEstimator<Did>,
+
pub commits: Vec<UFOsCommit>,
+
head: usize,
+
non_creates: usize,
+
}
+
+
impl<const LIMIT: usize> CollectionCommits<LIMIT> {
+
fn advance_head(&mut self) {
+
self.head += 1;
+
if self.head > LIMIT {
+
self.head = 0;
+
}
+
}
+
pub fn truncating_insert(&mut self, commit: UFOsCommit) -> Result<(), BatchInsertError> {
+
if self.non_creates == LIMIT {
+
return Err(BatchInsertError::BatchFull(commit));
+
}
+
let did = commit.did.clone();
+
let is_create = commit.action.is_create();
+
if self.commits.len() < LIMIT {
+
self.commits.push(commit);
+
if self.commits.capacity() > LIMIT {
+
self.commits.shrink_to(LIMIT); // save mem?????? maybe??
+
}
+
} else {
+
let head_started_at = self.head;
+
loop {
+
let candidate = self
+
.commits
+
.get_mut(self.head)
+
.ok_or(BatchInsertError::BatchOverflow(self.head))?;
+
if candidate.action.is_create() {
+
*candidate = commit;
+
break;
+
}
+
self.advance_head();
+
if self.head == head_started_at {
+
return Err(BatchInsertError::BatchForever);
+
}
+
}
+
}
+
+
if is_create {
+
self.total_seen += 1;
+
self.dids_estimate.insert(&did);
+
} else {
+
self.non_creates += 1;
+
}
+
+
Ok(())
+
}
}
#[derive(Debug, Clone)]
-
pub struct UpdateRecord {
+
pub struct DeleteAccount {
pub did: Did,
-
pub collection: Nsid,
-
pub rkey: RecordKey,
-
pub record: serde_json::Value,
pub cursor: Cursor,
}
#[derive(Debug, Clone)]
-
pub struct DeleteRecord {
-
pub did: Did,
-
pub collection: Nsid,
-
pub rkey: RecordKey,
-
pub cursor: Cursor,
+
pub enum CommitAction {
+
Put(PutAction),
+
Cut,
+
}
+
impl CommitAction {
+
pub fn is_create(&self) -> bool {
+
match self {
+
CommitAction::Put(PutAction { is_update, .. }) => !is_update,
+
CommitAction::Cut => false,
+
}
+
}
}
#[derive(Debug, Clone)]
-
pub enum ModifyRecord {
-
Update(UpdateRecord),
-
Delete(DeleteRecord),
+
pub struct PutAction {
+
record: Box<RawValue>,
+
is_update: bool,
}
#[derive(Debug, Clone)]
-
pub struct DeleteAccount {
+
pub struct UFOsCommit {
+
cursor: Cursor,
+
did: Did,
+
rkey: RecordKey,
+
rev: String,
+
action: CommitAction,
+
}
+
+
#[derive(Debug, Clone, Serialize)]
+
pub struct UFOsRecord {
+
pub cursor: Cursor,
pub did: Did,
-
pub cursor: Cursor,
+
pub collection: Nsid,
+
pub rkey: RecordKey,
+
pub rev: String,
+
// TODO: cid?
+
pub record: Box<RawValue>,
+
pub is_update: bool,
+
}
+
+
impl UFOsCommit {
+
pub fn from_commit_info(
+
commit: CommitEvent,
+
did: Did,
+
cursor: Cursor,
+
) -> Result<(Self, Nsid), FirehoseEventError> {
+
let action = match commit.operation {
+
CommitOp::Delete => CommitAction::Cut,
+
cru => CommitAction::Put(PutAction {
+
record: commit.record.ok_or(FirehoseEventError::CruMissingRecord)?,
+
is_update: cru == CommitOp::Update,
+
}),
+
};
+
let batched = Self {
+
cursor,
+
did,
+
rkey: commit.rkey,
+
rev: commit.rev,
+
action,
+
};
+
Ok((batched, commit.collection))
+
}
}
#[derive(Debug, Default, Clone)]
-
pub struct EventBatch {
-
pub record_creates: HashMap<Nsid, CollectionSamples>,
-
pub record_modifies: Vec<ModifyRecord>,
+
pub struct EventBatch<const LIMIT: usize> {
+
pub commits_by_nsid: HashMap<Nsid, CollectionCommits<LIMIT>>,
pub account_removes: Vec<DeleteAccount>,
-
pub first_jetstream_cursor: Option<Cursor>,
-
pub last_jetstream_cursor: Option<Cursor>,
+
}
+
+
impl<const LIMIT: usize> EventBatch<LIMIT> {
+
pub fn insert_commit_by_nsid(
+
&mut self,
+
collection: &Nsid,
+
commit: UFOsCommit,
+
max_collections: usize,
+
) -> Result<(), BatchInsertError> {
+
let map = &mut self.commits_by_nsid;
+
if !map.contains_key(collection) && map.len() >= max_collections {
+
return Err(BatchInsertError::BatchFull(commit));
+
}
+
map.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)?;
+
Ok(())
+
}
+
pub fn total_records(&self) -> usize {
+
self.commits_by_nsid.values().map(|v| v.commits.len()).sum()
+
}
+
pub fn total_seen(&self) -> usize {
+
self.commits_by_nsid.values().map(|v| v.total_seen).sum()
+
}
+
pub fn total_collections(&self) -> usize {
+
self.commits_by_nsid.len()
+
}
+
pub fn account_removes(&self) -> usize {
+
self.account_removes.len()
+
}
+
pub fn estimate_dids(&self) -> usize {
+
let mut estimator = CardinalityEstimator::<Did>::new();
+
for commits in self.commits_by_nsid.values() {
+
estimator.merge(&commits.dids_estimate);
+
}
+
estimator.estimate()
+
}
+
pub fn latest_cursor(&self) -> Option<Cursor> {
+
let mut oldest = Cursor::from_start();
+
for commits in self.commits_by_nsid.values() {
+
for commit in &commits.commits {
+
if commit.cursor > oldest {
+
oldest = commit.cursor;
+
}
+
}
+
}
+
if let Some(del) = self.account_removes.last() {
+
if del.cursor > oldest {
+
oldest = del.cursor;
+
}
+
}
+
if oldest > Cursor::from_start() {
+
Some(oldest)
+
} else {
+
None
+
}
+
}
+
pub fn is_empty(&self) -> bool {
+
self.commits_by_nsid.is_empty() && self.account_removes.is_empty()
+
}
+
}
+
+
#[derive(Debug, Serialize, JsonSchema)]
+
pub enum ConsumerInfo {
+
Jetstream {
+
endpoint: String,
+
started_at: u64,
+
latest_cursor: Option<u64>,
+
},
+
}
+
+
#[derive(Debug, Default, PartialEq, Serialize, JsonSchema)]
+
pub struct TopCollections {
+
total_records: u64,
+
dids_estimate: u64,
+
nsid_child_segments: HashMap<String, TopCollections>,
+
}
+
+
// this is not safe from ~DOS
+
// todo: remove this and just iterate the all-time rollups to get nsids? (or recent rollups?)
+
impl From<TopCollections> for Vec<String> {
+
fn from(tc: TopCollections) -> Self {
+
let mut me = vec![];
+
for (segment, children) in tc.nsid_child_segments {
+
let child_segments: Self = children.into();
+
if child_segments.is_empty() {
+
me.push(segment);
+
} else {
+
for ch in child_segments {
+
let nsid = format!("{segment}.{ch}");
+
me.push(nsid);
+
}
+
}
+
}
+
me
+
}
+
}
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
+
#[test]
+
fn test_top_collections_to_nsids() {
+
let empty_tc = TopCollections::default();
+
assert_eq!(Into::<Vec<String>>::into(empty_tc), Vec::<String>::new());
+
+
let tc = TopCollections {
+
nsid_child_segments: HashMap::from([
+
(
+
"a".to_string(),
+
TopCollections {
+
nsid_child_segments: HashMap::from([
+
("b".to_string(), TopCollections::default()),
+
("c".to_string(), TopCollections::default()),
+
]),
+
..Default::default()
+
},
+
),
+
("z".to_string(), TopCollections::default()),
+
]),
+
..Default::default()
+
};
+
+
let mut nsids: Vec<String> = tc.into();
+
nsids.sort();
+
assert_eq!(nsids, ["a.b", "a.c", "z"]);
+
}
+
+
#[test]
+
fn test_truncating_insert_truncates() -> anyhow::Result<()> {
+
let mut commits: CollectionCommits<2> = Default::default();
+
+
commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(100),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
+
rev: "rev-asdf".to_string(),
+
action: CommitAction::Put(PutAction {
+
record: RawValue::from_string("{}".to_string())?,
+
is_update: false,
+
}),
+
})?;
+
+
commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(101),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
+
rev: "rev-asdg".to_string(),
+
action: CommitAction::Put(PutAction {
+
record: RawValue::from_string("{}".to_string())?,
+
is_update: false,
+
}),
+
})?;
+
+
commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(102),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
+
rev: "rev-asdh".to_string(),
+
action: CommitAction::Put(PutAction {
+
record: RawValue::from_string("{}".to_string())?,
+
is_update: false,
+
}),
+
})?;
+
+
assert_eq!(commits.total_seen, 3);
+
assert_eq!(commits.dids_estimate.estimate(), 1);
+
assert_eq!(commits.commits.len(), 2);
+
+
let mut found_first = false;
+
let mut found_last = false;
+
for commit in commits.commits {
+
match commit.rev.as_ref() {
+
"rev-asdf" => {
+
found_first = true;
+
}
+
"rev-asdh" => {
+
found_last = true;
+
}
+
_ => {}
+
}
+
}
+
assert!(!found_first);
+
assert!(found_last);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_truncating_insert_does_not_truncate_deletes() -> anyhow::Result<()> {
+
let mut commits: CollectionCommits<2> = Default::default();
+
+
commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(100),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
+
rev: "rev-asdf".to_string(),
+
action: CommitAction::Cut,
+
})?;
+
+
commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(101),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
+
rev: "rev-asdg".to_string(),
+
action: CommitAction::Put(PutAction {
+
record: RawValue::from_string("{}".to_string())?,
+
is_update: false,
+
}),
+
})?;
+
+
commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(102),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
+
rev: "rev-asdh".to_string(),
+
action: CommitAction::Put(PutAction {
+
record: RawValue::from_string("{}".to_string())?,
+
is_update: false,
+
}),
+
})?;
+
+
assert_eq!(commits.total_seen, 2);
+
assert_eq!(commits.dids_estimate.estimate(), 1);
+
assert_eq!(commits.commits.len(), 2);
+
+
let mut found_first = false;
+
let mut found_last = false;
+
let mut found_delete = false;
+
for commit in commits.commits {
+
match commit.rev.as_ref() {
+
"rev-asdg" => {
+
found_first = true;
+
}
+
"rev-asdh" => {
+
found_last = true;
+
}
+
_ => {}
+
}
+
if let CommitAction::Cut = commit.action {
+
found_delete = true;
+
}
+
}
+
assert!(!found_first);
+
assert!(found_last);
+
assert!(found_delete);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_truncating_insert_maxes_out_deletes() -> anyhow::Result<()> {
+
let mut commits: CollectionCommits<2> = Default::default();
+
+
commits
+
.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(100),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
+
rev: "rev-asdf".to_string(),
+
action: CommitAction::Cut,
+
})
+
.unwrap();
+
+
// this create will just be discarded
+
commits
+
.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(80),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-zzz".to_string()).unwrap(),
+
rev: "rev-asdzzz".to_string(),
+
action: CommitAction::Put(PutAction {
+
record: RawValue::from_string("{}".to_string())?,
+
is_update: false,
+
}),
+
})
+
.unwrap();
+
+
commits
+
.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(101),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
+
rev: "rev-asdg".to_string(),
+
action: CommitAction::Cut,
+
})
+
.unwrap();
+
+
let res = commits.truncating_insert(UFOsCommit {
+
cursor: Cursor::from_raw_u64(102),
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
+
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
+
rev: "rev-asdh".to_string(),
+
action: CommitAction::Cut,
+
});
+
+
assert!(res.is_err());
+
let overflowed = match res {
+
Err(BatchInsertError::BatchFull(c)) => c,
+
e => panic!("expected overflow but a different error happened: {e:?}"),
+
};
+
assert_eq!(overflowed.rev, "rev-asdh");
+
+
Ok(())
+
}
}
+86 -33
ufos/src/main.rs
···
use clap::Parser;
+
use jetstream::events::Cursor;
use std::path::PathBuf;
-
use ufos::{consumer, server, store};
+
use ufos::consumer;
+
use ufos::error::StorageError;
+
use ufos::file_consumer;
+
use ufos::server;
+
use ufos::storage::{StorageWhatever, StoreReader, StoreWriter};
+
use ufos::storage_fjall::FjallStorage;
+
use ufos::storage_mem::MemStorage;
#[cfg(not(target_env = "msvc"))]
use tikv_jemallocator::Jemalloc;
···
#[arg(long)]
data: PathBuf,
/// DEBUG: don't start the jetstream consumer or its write loop
+
/// todo: restore this
#[arg(long, action)]
pause_writer: bool,
/// DEBUG: force the rw loop to fall behind by pausing it
+
/// todo: restore this
#[arg(long, action)]
pause_rw: bool,
+
/// DEBUG: use an in-memory store instead of fjall
+
#[arg(long, action)]
+
in_mem: bool,
+
/// DEBUG: interpret jetstream as a file fixture
+
#[arg(long, action)]
+
jetstream_fixture: bool,
}
// #[tokio::main(flavor = "current_thread")] // TODO: move this to config via args
···
env_logger::init();
let args = Args::parse();
-
let (storage, cursor) =
-
store::Storage::open(args.data, &args.jetstream, args.jetstream_force).await?;
+
let jetstream = args.jetstream.clone();
+
if args.in_mem {
+
let (read_store, write_store, cursor) = MemStorage::init(
+
args.data,
+
jetstream,
+
args.jetstream_force,
+
Default::default(),
+
)?;
+
go(
+
args.jetstream,
+
args.jetstream_fixture,
+
args.pause_writer,
+
read_store,
+
write_store,
+
cursor,
+
)
+
.await?;
+
} else {
+
let (read_store, write_store, cursor) = FjallStorage::init(
+
args.data,
+
jetstream,
+
args.jetstream_force,
+
Default::default(),
+
)?;
+
go(
+
args.jetstream,
+
args.jetstream_fixture,
+
args.pause_writer,
+
read_store,
+
write_store,
+
cursor,
+
)
+
.await?;
+
}
+
Ok(())
+
}
+
+
async fn go(
+
jetstream: String,
+
jetstream_fixture: bool,
+
pause_writer: bool,
+
read_store: impl StoreReader + 'static,
+
mut write_store: impl StoreWriter + 'static,
+
cursor: Option<Cursor>,
+
) -> anyhow::Result<()> {
println!("starting server with storage...");
-
let serving = server::serve(storage.clone());
+
let serving = server::serve(read_store);
let t1 = tokio::task::spawn(async {
let r = serving.await;
···
});
let t2: tokio::task::JoinHandle<anyhow::Result<()>> = tokio::task::spawn({
-
let storage = storage.clone();
async move {
-
if !args.pause_writer {
+
if !pause_writer {
println!(
"starting consumer with cursor: {cursor:?} from {:?} ago",
-
cursor.clone().map(|c| c.elapsed())
+
cursor.map(|c| c.elapsed())
);
-
let batches =
-
consumer::consume(&args.jetstream, cursor, args.jetstream_no_zstd).await?;
-
let r = storage.receive(batches).await;
-
log::warn!("storage.receive ended with: {r:?}");
+
let mut batches = if jetstream_fixture {
+
file_consumer::consume(jetstream.into()).await?
+
} else {
+
consumer::consume(&jetstream, cursor, false).await?
+
};
+
+
tokio::task::spawn_blocking(move || {
+
while let Some(event_batch) = batches.blocking_recv() {
+
write_store.insert_batch(event_batch)?;
+
write_store
+
.step_rollup()
+
.inspect_err(|e| log::error!("laksjdfl: {e:?}"))?;
+
}
+
Ok::<(), StorageError>(())
+
})
+
.await??;
+
+
log::warn!("storage.receive ended with");
} else {
log::info!("not starting jetstream or the write loop.");
}
···
}
});
-
let t3 = tokio::task::spawn(async move {
-
if !args.pause_rw {
-
let r = storage.rw_loop().await;
-
log::warn!("storage.rw_loop ended with: {r:?}");
-
} else {
-
log::info!("not starting rw loop.");
-
}
-
});
-
-
// tokio::select! {
-
// // v = serving => eprintln!("serving ended: {v:?}"),
-
// v = storage.receive(batches) => eprintln!("storage consumer ended: {v:?}"),
-
// v = storage.rw_loop() => eprintln!("storage rw-loop ended: {v:?}"),
-
// };
-
-
log::trace!("tasks running. waiting.");
-
t1.await?;
-
log::trace!("serve task ended.");
-
t2.await??;
-
log::trace!("storage receive task ended.");
-
t3.await?;
-
log::trace!("storage rw task ended.");
+
tokio::select! {
+
z = t1 => log::warn!("serve task ended: {z:?}"),
+
z = t2 => log::warn!("storage task ended: {z:?}"),
+
};
println!("bye!");
+91 -92
ufos/src/server.rs
···
-
use crate::store::{Storage, StorageInfo};
-
use crate::{CreateRecord, Nsid};
+
use crate::storage::StoreReader;
+
use crate::{ConsumerInfo, Nsid, TopCollections, UFOsRecord};
use dropshot::endpoint;
use dropshot::ApiDescription;
use dropshot::ConfigDropshot;
···
use std::collections::HashMap;
use std::sync::Arc;
-
#[derive(Clone)]
struct Context {
pub spec: Arc<serde_json::Value>,
-
storage: Storage,
+
storage: Box<dyn StoreReader>,
}
/// Meta: get the openapi spec for this api
···
#[derive(Debug, Serialize, JsonSchema)]
struct MetaInfo {
-
storage_info: StorageInfo,
-
jetstream_endpoint: Option<String>,
-
jetstream_cursor: Option<u64>,
-
mod_cursor: Option<u64>,
+
storage: serde_json::Value,
+
consumer: ConsumerInfo,
}
/// Get meta information about UFOs itself
#[endpoint {
···
}]
async fn get_meta_info(ctx: RequestContext<Context>) -> OkCorsResponse<MetaInfo> {
let Context { storage, .. } = ctx.context();
-
let failed_to_get =
|what| move |e| HttpError::for_internal_error(format!("failed to get {what}: {e:?}"));
let storage_info = storage
-
.get_meta_info()
-
.await
-
.map_err(failed_to_get("meta info"))?;
-
-
let jetstream_endpoint = storage
-
.get_jetstream_endpoint()
-
.await
-
.map_err(failed_to_get("jetstream endpoint"))?
-
.map(|v| v.0);
-
-
let jetstream_cursor = storage
-
.get_jetstream_cursor()
+
.get_storage_stats()
.await
-
.map_err(failed_to_get("jetstream cursor"))?
-
.map(|c| c.to_raw_u64());
+
.map_err(failed_to_get("storage info"))?;
-
let mod_cursor = storage
-
.get_mod_cursor()
+
let consumer = storage
+
.get_consumer_info()
.await
-
.map_err(failed_to_get("jetstream cursor"))?
-
.map(|c| c.to_raw_u64());
+
.map_err(failed_to_get("consumer info"))?;
ok_cors(MetaInfo {
-
storage_info,
-
jetstream_endpoint,
-
jetstream_cursor,
-
mod_cursor,
+
storage: storage_info,
+
consumer,
})
}
+
fn to_multiple_nsids(s: &str) -> Result<Vec<Nsid>, String> {
+
let mut out = Vec::new();
+
for collection in s.split(',') {
+
let Ok(nsid) = Nsid::new(collection.to_string()) else {
+
return Err(format!("collection {collection:?} was not a valid NSID"));
+
};
+
out.push(nsid);
+
}
+
Ok(out)
+
}
#[derive(Debug, Deserialize, JsonSchema)]
-
struct CollectionsQuery {
-
collection: String, // JsonSchema not implemented for Nsid :(
-
}
-
impl CollectionsQuery {
-
fn to_multiple_nsids(&self) -> Result<Vec<Nsid>, String> {
-
let mut out = Vec::with_capacity(self.collection.len());
-
for collection in self.collection.split(',') {
-
let Ok(nsid) = Nsid::new(collection.to_string()) else {
-
return Err(format!("collection {collection:?} was not a valid NSID"));
-
};
-
out.push(nsid);
-
}
-
Ok(out)
-
}
+
struct RecordsCollectionsQuery {
+
collection: Option<String>, // JsonSchema not implemented for Nsid :(
}
#[derive(Debug, Serialize, JsonSchema)]
struct ApiRecord {
did: String,
collection: String,
rkey: String,
-
record: serde_json::Value,
+
record: Box<serde_json::value::RawValue>,
time_us: u64,
}
-
impl ApiRecord {
-
fn from_create_record(create_record: CreateRecord, collection: &Nsid) -> Self {
-
let CreateRecord {
-
did,
-
rkey,
-
record,
-
cursor,
-
} = create_record;
+
impl From<UFOsRecord> for ApiRecord {
+
fn from(ufo: UFOsRecord) -> Self {
Self {
-
did: did.to_string(),
-
collection: collection.to_string(),
-
rkey: rkey.to_string(),
-
record,
-
time_us: cursor.to_raw_u64(),
+
did: ufo.did.to_string(),
+
collection: ufo.collection.to_string(),
+
rkey: ufo.rkey.to_string(),
+
record: ufo.record,
+
time_us: ufo.cursor.to_raw_u64(),
}
}
}
···
method = GET,
path = "/records",
}]
-
async fn get_records_by_collection(
+
async fn get_records_by_collections(
ctx: RequestContext<Context>,
-
collection_query: Query<CollectionsQuery>,
+
collection_query: Query<RecordsCollectionsQuery>,
) -> OkCorsResponse<Vec<ApiRecord>> {
let Context { storage, .. } = ctx.context();
+
let mut limit = 42;
+
let query = collection_query.into_inner();
+
let collections = if let Some(provided_collection) = query.collection {
+
to_multiple_nsids(&provided_collection)
+
.map_err(|reason| HttpError::for_bad_request(None, reason))?
+
} else {
+
let all_collections_should_be_nsids: Vec<String> = storage
+
.get_top_collections()
+
.await
+
.map_err(|e| {
+
HttpError::for_internal_error(format!("failed to get top collections: {e:?}"))
+
})?
+
.into();
+
let mut all_collections = Vec::with_capacity(all_collections_should_be_nsids.len());
+
for raw_nsid in all_collections_should_be_nsids {
+
let nsid = Nsid::new(raw_nsid).map_err(|e| {
+
HttpError::for_internal_error(format!("failed to parse nsid: {e:?}"))
+
})?;
+
all_collections.push(nsid);
+
}
-
let collections = collection_query
-
.into_inner()
-
.to_multiple_nsids()
-
.map_err(|reason| HttpError::for_bad_request(None, reason))?;
+
limit = 12;
+
all_collections
+
};
-
let mut api_records = Vec::new();
+
let records = storage
+
.get_records_by_collections(&collections, limit, true)
+
.await
+
.map_err(|e| HttpError::for_internal_error(e.to_string()))?
+
.into_iter()
+
.map(|r| r.into())
+
.collect();
-
// TODO: set up multiple db iterators and iterate them together with merge sort
-
for collection in &collections {
-
let records = storage
-
.get_collection_records(collection, 100)
-
.await
-
.map_err(|e| HttpError::for_internal_error(e.to_string()))?;
+
ok_cors(records)
+
}
-
for record in records {
-
let api_record = ApiRecord::from_create_record(record, collection);
-
api_records.push(api_record);
-
}
-
}
-
-
ok_cors(api_records)
+
#[derive(Debug, Deserialize, JsonSchema)]
+
struct TotalSeenCollectionsQuery {
+
collection: String, // JsonSchema not implemented for Nsid :(
+
}
+
#[derive(Debug, Serialize, JsonSchema)]
+
struct TotalCounts {
+
total_records: u64,
+
dids_estimate: u64,
}
-
/// Get total records seen by collection
#[endpoint {
method = GET,
···
}]
async fn get_records_total_seen(
ctx: RequestContext<Context>,
-
collection_query: Query<CollectionsQuery>,
-
) -> OkCorsResponse<HashMap<String, u64>> {
+
collection_query: Query<TotalSeenCollectionsQuery>,
+
) -> OkCorsResponse<HashMap<String, TotalCounts>> {
let Context { storage, .. } = ctx.context();
-
let collections = collection_query
-
.into_inner()
-
.to_multiple_nsids()
+
let query = collection_query.into_inner();
+
let collections = to_multiple_nsids(&query.collection)
.map_err(|reason| HttpError::for_bad_request(None, reason))?;
let mut seen_by_collection = HashMap::with_capacity(collections.len());
for collection in &collections {
-
let total = storage
-
.get_collection_total_seen(collection)
+
let (total_records, dids_estimate) = storage
+
.get_counts_by_collection(collection)
.await
.map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
-
seen_by_collection.insert(collection.to_string(), total);
+
seen_by_collection.insert(
+
collection.to_string(),
+
TotalCounts {
+
total_records,
+
dids_estimate,
+
},
+
);
}
ok_cors(seen_by_collection)
···
method = GET,
path = "/collections"
}]
-
async fn get_top_collections(ctx: RequestContext<Context>) -> OkCorsResponse<HashMap<String, u64>> {
+
async fn get_top_collections(ctx: RequestContext<Context>) -> OkCorsResponse<TopCollections> {
let Context { storage, .. } = ctx.context();
let collections = storage
.get_top_collections()
···
ok_cors(collections)
}
-
pub async fn serve(storage: Storage) -> Result<(), String> {
+
pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> {
let log = ConfigLogging::StderrTerminal {
level: ConfigLoggingLevel::Info,
}
···
api.register(get_openapi).unwrap();
api.register(get_meta_info).unwrap();
-
api.register(get_records_by_collection).unwrap();
+
api.register(get_records_by_collections).unwrap();
api.register(get_records_total_seen).unwrap();
api.register(get_top_collections).unwrap();
···
.json()
.map_err(|e| e.to_string())?,
),
-
storage,
+
storage: Box::new(storage),
};
ServerBuilder::new(api, context, log)
+49
ufos/src/storage.rs
···
+
// use crate::store_types::CountsValue;
+
use crate::{error::StorageError, ConsumerInfo, Cursor, EventBatch, TopCollections, UFOsRecord};
+
use async_trait::async_trait;
+
use jetstream::exports::{Did, Nsid};
+
use std::path::Path;
+
+
pub type StorageResult<T> = Result<T, StorageError>;
+
+
pub trait StorageWhatever<R: StoreReader, W: StoreWriter, C> {
+
fn init(
+
path: impl AsRef<Path>,
+
endpoint: String,
+
force_endpoint: bool,
+
config: C,
+
) -> StorageResult<(R, W, Option<Cursor>)>
+
where
+
Self: Sized;
+
}
+
+
pub trait StoreWriter: Send + Sync {
+
fn insert_batch<const LIMIT: usize>(
+
&mut self,
+
event_batch: EventBatch<LIMIT>,
+
) -> StorageResult<()>;
+
+
fn step_rollup(&mut self) -> StorageResult<usize>;
+
+
fn trim_collection(&mut self, collection: &Nsid, limit: usize) -> StorageResult<()>;
+
+
fn delete_account(&mut self, did: &Did) -> StorageResult<usize>;
+
}
+
+
#[async_trait]
+
pub trait StoreReader: Send + Sync {
+
async fn get_storage_stats(&self) -> StorageResult<serde_json::Value>;
+
+
async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo>;
+
+
async fn get_top_collections(&self) -> StorageResult<TopCollections>;
+
+
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)>;
+
+
async fn get_records_by_collections(
+
&self,
+
collections: &[Nsid],
+
limit: usize,
+
expand_each_collection: bool,
+
) -> StorageResult<Vec<UFOsRecord>>;
+
}
+1795
ufos/src/storage_fjall.rs
···
+
use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr};
+
use crate::error::StorageError;
+
use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter};
+
use crate::store_types::{
+
AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal,
+
HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue,
+
JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey,
+
NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey,
+
RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue,
+
WeekTruncatedCursor, WeeklyRollupKey,
+
};
+
use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord};
+
use async_trait::async_trait;
+
use fjall::{Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle};
+
use jetstream::events::Cursor;
+
use std::collections::HashMap;
+
use std::path::Path;
+
use std::time::SystemTime;
+
+
const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds
+
const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024;
+
const MAX_BATCHED_ROLLUP_COUNTS: usize = 256;
+
+
///
+
/// new data format, roughly:
+
///
+
/// Partion: 'global'
+
///
+
/// - Global sequence counter (is the jetstream cursor -- monotonic with many gaps)
+
/// - key: "js_cursor" (literal)
+
/// - val: u64
+
///
+
/// - Jetstream server endpoint (persisted because the cursor can't be used on another instance without data loss)
+
/// - key: "js_endpoint" (literal)
+
/// - val: string (URL of the instance)
+
///
+
/// - Launch date
+
/// - key: "takeoff" (literal)
+
/// - val: u64 (micros timestamp, not from jetstream for now so not precise)
+
///
+
/// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes)
+
/// - key: "rollup_cursor" (literal)
+
/// - val: u64 (tracks behind js_cursor)
+
///
+
///
+
/// Partition: 'feed'
+
///
+
/// - Per-collection list of record references ordered by jetstream cursor
+
/// - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor)
+
/// - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.)
+
///
+
///
+
/// Partition: 'records'
+
///
+
/// - Actual records by their atproto location
+
/// - key: nullstr || nullstr || nullstr (did, collection, rkey)
+
/// - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record)
+
///
+
///
+
/// Partition: 'rollups'
+
///
+
/// - Live (batched) records counts and dids estimate per collection
+
/// - key: "live_counts" || u64 || nullstr (js_cursor, nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - Hourly total record counts and dids estimate per collection
+
/// - key: "hourly_counts" || u64 || nullstr (hour, nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - Weekly total record counts and dids estimate per collection
+
/// - key: "weekly_counts" || u64 || nullstr (hour, nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - All-time total record counts and dids estimate per collection
+
/// - key: "ever_counts" || nullstr (nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - TODO: sorted indexes for all-times?
+
///
+
///
+
/// Partition: 'queues'
+
///
+
/// - Delete account queue
+
/// - key: "delete_acount" || u64 (js_cursor)
+
/// - val: nullstr (did)
+
///
+
///
+
/// TODO: moderation actions
+
/// TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy fetching + caching on read.
+
#[derive(Debug)]
+
pub struct FjallStorage {}
+
+
#[derive(Debug, Default)]
+
pub struct FjallConfig {
+
/// drop the db when the storage is dropped
+
///
+
/// this is only meant for tests
+
#[cfg(test)]
+
pub temp: bool,
+
}
+
+
impl StorageWhatever<FjallReader, FjallWriter, FjallConfig> for FjallStorage {
+
fn init(
+
path: impl AsRef<Path>,
+
endpoint: String,
+
force_endpoint: bool,
+
_config: FjallConfig,
+
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>)> {
+
let keyspace = {
+
let config = Config::new(path);
+
+
#[cfg(not(test))]
+
let config = config.fsync_ms(Some(4_000));
+
+
config.open()?
+
};
+
+
let global = keyspace.open_partition("global", PartitionCreateOptions::default())?;
+
let feeds = keyspace.open_partition("feeds", PartitionCreateOptions::default())?;
+
let records = keyspace.open_partition("records", PartitionCreateOptions::default())?;
+
let rollups = keyspace.open_partition("rollups", PartitionCreateOptions::default())?;
+
let queues = keyspace.open_partition("queues", PartitionCreateOptions::default())?;
+
+
let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?;
+
+
if js_cursor.is_some() {
+
let stored_endpoint =
+
get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?;
+
+
let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError(
+
"found cursor but missing js_endpoint, refusing to start.".to_string(),
+
))?;
+
+
if stored != endpoint {
+
if force_endpoint {
+
log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}");
+
insert_static_neu::<JetstreamEndpointKey>(
+
&global,
+
JetstreamEndpointValue(endpoint.to_string()),
+
)?;
+
} else {
+
return Err(StorageError::InitError(format!(
+
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start.")));
+
}
+
}
+
} else {
+
insert_static_neu::<JetstreamEndpointKey>(
+
&global,
+
JetstreamEndpointValue(endpoint.to_string()),
+
)?;
+
insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
+
insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
+
}
+
+
let reader = FjallReader {
+
keyspace: keyspace.clone(),
+
global: global.clone(),
+
feeds: feeds.clone(),
+
records: records.clone(),
+
rollups: rollups.clone(),
+
};
+
let writer = FjallWriter {
+
keyspace,
+
global,
+
feeds,
+
records,
+
rollups,
+
queues,
+
};
+
Ok((reader, writer, js_cursor))
+
}
+
}
+
+
type FjallRKV = fjall::Result<(fjall::Slice, fjall::Slice)>;
+
+
#[derive(Clone)]
+
pub struct FjallReader {
+
keyspace: Keyspace,
+
global: PartitionHandle,
+
feeds: PartitionHandle,
+
records: PartitionHandle,
+
rollups: PartitionHandle,
+
}
+
+
/// An iterator that knows how to skip over deleted/invalidated records
+
struct RecordIterator {
+
db_iter: Box<dyn Iterator<Item = FjallRKV>>,
+
records: PartitionHandle,
+
limit: usize,
+
fetched: usize,
+
}
+
impl RecordIterator {
+
pub fn new(
+
feeds: &PartitionHandle,
+
records: PartitionHandle,
+
collection: &Nsid,
+
limit: usize,
+
) -> StorageResult<Self> {
+
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
+
let db_iter = feeds.prefix(prefix).rev();
+
Ok(Self {
+
db_iter: Box::new(db_iter),
+
records,
+
limit,
+
fetched: 0,
+
})
+
}
+
fn get_record(&self, db_next: FjallRKV) -> StorageResult<Option<UFOsRecord>> {
+
let (key_bytes, val_bytes) = db_next?;
+
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
+
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
+
let location_key: RecordLocationKey = (&feed_key, &feed_val).into();
+
+
let Some(location_val_bytes) = self.records.get(location_key.to_db_bytes()?)? else {
+
// record was deleted (hopefully)
+
return Ok(None);
+
};
+
+
let (meta, n) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
+
+
if meta.cursor() != feed_key.cursor() {
+
// older/different version
+
return Ok(None);
+
}
+
if meta.rev != feed_val.rev() {
+
// weird...
+
log::warn!("record lookup: cursor match but rev did not...? excluding.");
+
return Ok(None);
+
}
+
let Some(raw_value_bytes) = location_val_bytes.get(n..) else {
+
log::warn!(
+
"record lookup: found record but could not get bytes to decode the record??"
+
);
+
return Ok(None);
+
};
+
let rawval = db_complete::<RecordRawValue>(raw_value_bytes)?;
+
Ok(Some(UFOsRecord {
+
collection: feed_key.collection().clone(),
+
cursor: feed_key.cursor(),
+
did: feed_val.did().clone(),
+
rkey: feed_val.rkey().clone(),
+
rev: meta.rev.to_string(),
+
record: rawval.try_into()?,
+
is_update: meta.is_update,
+
}))
+
}
+
}
+
impl Iterator for RecordIterator {
+
type Item = StorageResult<Option<UFOsRecord>>;
+
fn next(&mut self) -> Option<Self::Item> {
+
if self.fetched == self.limit {
+
return Some(Ok(None));
+
}
+
let record = loop {
+
let db_next = self.db_iter.next()?; // None short-circuits here
+
match self.get_record(db_next) {
+
Err(e) => return Some(Err(e)),
+
Ok(Some(record)) => break record,
+
Ok(None) => continue,
+
}
+
};
+
self.fetched += 1;
+
Some(Ok(Some(record)))
+
}
+
}
+
+
impl FjallReader {
+
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
+
let rollup_cursor =
+
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?
+
.map(|c| c.to_raw_u64());
+
+
Ok(serde_json::json!({
+
"keyspace_disk_space": self.keyspace.disk_space(),
+
"keyspace_journal_count": self.keyspace.journal_count(),
+
"keyspace_sequence": self.keyspace.instant(),
+
"rollup_cursor": rollup_cursor,
+
}))
+
}
+
+
fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> {
+
let global = self.global.snapshot();
+
+
let endpoint =
+
get_snapshot_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?
+
.ok_or(StorageError::BadStateError(
+
"Could not find jetstream endpoint".to_string(),
+
))?
+
.0;
+
+
let started_at = get_snapshot_static_neu::<TakeoffKey, TakeoffValue>(&global)?
+
.ok_or(StorageError::BadStateError(
+
"Could not find jetstream takeoff time".to_string(),
+
))?
+
.to_raw_u64();
+
+
let latest_cursor =
+
get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?
+
.map(|c| c.to_raw_u64());
+
+
Ok(ConsumerInfo::Jetstream {
+
endpoint,
+
started_at,
+
latest_cursor,
+
})
+
}
+
+
fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
+
// TODO: limit nsid traversal depth
+
// TODO: limit nsid traversal breadth
+
// TODO: be serious about anything
+
+
// TODO: probably use a stack of segments to reduce to ~log-n merges
+
+
#[derive(Default)]
+
struct Blah {
+
counts: CountsValue,
+
children: HashMap<String, Blah>,
+
}
+
impl From<&Blah> for TopCollections {
+
fn from(bla: &Blah) -> Self {
+
Self {
+
total_records: bla.counts.records(),
+
dids_estimate: bla.counts.dids().estimate() as u64,
+
nsid_child_segments: HashMap::from_iter(
+
bla.children.iter().map(|(k, v)| (k.to_string(), v.into())),
+
),
+
}
+
}
+
}
+
+
let mut b = Blah::default();
+
let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?;
+
for kv in self.rollups.prefix(&prefix.to_db_bytes()?) {
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<AllTimeRollupKey>(&key_bytes)?;
+
let val = db_complete::<CountsValue>(&val_bytes)?;
+
+
let mut node = &mut b;
+
node.counts.merge(&val);
+
for segment in key.collection().split('.') {
+
node = node.children.entry(segment.to_string()).or_default();
+
node.counts.merge(&val);
+
}
+
}
+
+
Ok((&b).into())
+
}
+
+
fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
+
// 0. grab a snapshot in case rollups happen while we're working
+
let instant = self.keyspace.instant();
+
let global = self.global.snapshot_at(instant);
+
let rollups = self.rollups.snapshot_at(instant);
+
+
// 1. all-time counts
+
let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?;
+
let mut total_counts = rollups
+
.get(&all_time_key)?
+
.as_deref()
+
.map(db_complete::<CountsValue>)
+
.transpose()?
+
.unwrap_or_default();
+
+
// 2. live counts that haven't been rolled into all-time yet.
+
let rollup_cursor =
+
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or(
+
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
+
)?;
+
+
let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
+
for kv in rollups.range(full_range) {
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<LiveCountsKey>(&key_bytes)?;
+
if key.collection() == collection {
+
let counts = db_complete::<CountsValue>(&val_bytes)?;
+
total_counts.merge(&counts);
+
}
+
}
+
Ok((
+
total_counts.records(),
+
total_counts.dids().estimate() as u64,
+
))
+
}
+
+
fn get_records_by_collections(
+
&self,
+
collections: &[Nsid],
+
limit: usize,
+
expand_each_collection: bool,
+
) -> StorageResult<Vec<UFOsRecord>> {
+
if collections.is_empty() {
+
return Ok(vec![]);
+
}
+
let mut record_iterators = Vec::new();
+
for collection in collections {
+
let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?;
+
record_iterators.push(iter.peekable());
+
}
+
let mut merged = Vec::new();
+
loop {
+
let mut latest: Option<(Cursor, usize)> = None; // ugh
+
for (i, iter) in record_iterators.iter_mut().enumerate() {
+
let Some(it) = iter.peek_mut() else {
+
continue;
+
};
+
let it = match it {
+
Ok(v) => v,
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
+
};
+
let Some(rec) = it else {
+
if expand_each_collection {
+
continue;
+
} else {
+
break;
+
}
+
};
+
if let Some((cursor, _)) = latest {
+
if rec.cursor > cursor {
+
latest = Some((rec.cursor, i))
+
}
+
} else {
+
latest = Some((rec.cursor, i));
+
}
+
}
+
let Some((_, idx)) = latest else {
+
break;
+
};
+
// yeah yeah whateverrrrrrrrrrrrrrrr
+
merged.push(record_iterators[idx].next().unwrap().unwrap().unwrap());
+
}
+
Ok(merged)
+
}
+
}
+
+
#[async_trait]
+
impl StoreReader for FjallReader {
+
async fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || FjallReader::get_storage_stats(&s)).await?
+
}
+
async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || FjallReader::get_consumer_info(&s)).await?
+
}
+
async fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || FjallReader::get_top_collections(&s)).await?
+
}
+
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
+
let s = self.clone();
+
let collection = collection.clone();
+
tokio::task::spawn_blocking(move || FjallReader::get_counts_by_collection(&s, &collection))
+
.await?
+
}
+
async fn get_records_by_collections(
+
&self,
+
collections: &[Nsid],
+
limit: usize,
+
expand_each_collection: bool,
+
) -> StorageResult<Vec<UFOsRecord>> {
+
let s = self.clone();
+
let collections = collections.to_vec();
+
tokio::task::spawn_blocking(move || {
+
FjallReader::get_records_by_collections(&s, &collections, limit, expand_each_collection)
+
})
+
.await?
+
}
+
}
+
+
pub struct FjallWriter {
+
keyspace: Keyspace,
+
global: PartitionHandle,
+
feeds: PartitionHandle,
+
records: PartitionHandle,
+
rollups: PartitionHandle,
+
queues: PartitionHandle,
+
}
+
+
impl FjallWriter {
+
fn rollup_delete_account(
+
&mut self,
+
cursor: Cursor,
+
key_bytes: &[u8],
+
val_bytes: &[u8],
+
) -> StorageResult<usize> {
+
let did = db_complete::<DeleteAccountQueueVal>(val_bytes)?;
+
self.delete_account(&did)?;
+
let mut batch = self.keyspace.batch();
+
batch.remove(&self.queues, key_bytes);
+
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, cursor)?;
+
batch.commit()?;
+
Ok(1)
+
}
+
+
fn rollup_live_counts(
+
&mut self,
+
timelies: impl Iterator<Item = Result<(fjall::Slice, fjall::Slice), fjall::Error>>,
+
cursor_exclusive_limit: Option<Cursor>,
+
rollup_limit: usize,
+
) -> StorageResult<usize> {
+
// current strategy is to buffer counts in mem before writing the rollups
+
// we *could* read+write every single batch to rollup.. but their merge is associative so
+
// ...so save the db some work up front? is this worth it? who knows...
+
+
#[derive(Eq, Hash, PartialEq)]
+
enum Rollup {
+
Hourly(HourTruncatedCursor),
+
Weekly(WeekTruncatedCursor),
+
AllTime,
+
}
+
+
let mut batch = self.keyspace.batch();
+
let mut cursors_advanced = 0;
+
let mut last_cursor = Cursor::from_start();
+
let mut counts_by_rollup: HashMap<(Nsid, Rollup), CountsValue> = HashMap::new();
+
+
for (i, kv) in timelies.enumerate() {
+
if i >= rollup_limit {
+
break;
+
}
+
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<LiveCountsKey>(&key_bytes)?;
+
+
if cursor_exclusive_limit
+
.map(|limit| key.cursor() > limit)
+
.unwrap_or(false)
+
{
+
break;
+
}
+
+
batch.remove(&self.rollups, key_bytes);
+
let val = db_complete::<CountsValue>(&val_bytes)?;
+
counts_by_rollup
+
.entry((
+
key.collection().clone(),
+
Rollup::Hourly(key.cursor().into()),
+
))
+
.or_default()
+
.merge(&val);
+
counts_by_rollup
+
.entry((
+
key.collection().clone(),
+
Rollup::Weekly(key.cursor().into()),
+
))
+
.or_default()
+
.merge(&val);
+
counts_by_rollup
+
.entry((key.collection().clone(), Rollup::AllTime))
+
.or_default()
+
.merge(&val);
+
+
cursors_advanced += 1;
+
last_cursor = key.cursor();
+
}
+
+
for ((nsid, rollup), counts) in counts_by_rollup {
+
let key_bytes = match rollup {
+
Rollup::Hourly(hourly_cursor) => {
+
let k = HourlyRollupKey::new(hourly_cursor, &nsid);
+
k.to_db_bytes()?
+
}
+
Rollup::Weekly(weekly_cursor) => {
+
let k = WeeklyRollupKey::new(weekly_cursor, &nsid);
+
k.to_db_bytes()?
+
}
+
Rollup::AllTime => {
+
let k = AllTimeRollupKey::new(&nsid);
+
k.to_db_bytes()?
+
}
+
};
+
let mut rolled: CountsValue = self
+
.rollups
+
.get(&key_bytes)?
+
.as_deref()
+
.map(db_complete::<CountsValue>)
+
.transpose()?
+
.unwrap_or_default();
+
+
// try to round-trip before inserting, for funsies
+
let tripppin = counts.to_db_bytes()?;
+
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
+
assert_eq!(n, tripppin.len());
+
assert_eq!(counts.prefix, and_back.prefix);
+
assert_eq!(counts.dids().estimate(), and_back.dids().estimate());
+
if counts.records() > 200_000_000_000 {
+
panic!("COUNTS maybe wtf? {counts:?}")
+
}
+
+
rolled.merge(&counts);
+
batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?);
+
}
+
+
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)?;
+
+
batch.commit()?;
+
Ok(cursors_advanced)
+
}
+
}
+
+
impl StoreWriter for FjallWriter {
+
fn insert_batch<const LIMIT: usize>(
+
&mut self,
+
event_batch: EventBatch<LIMIT>,
+
) -> StorageResult<()> {
+
if event_batch.is_empty() {
+
return Ok(());
+
}
+
+
let mut batch = self.keyspace.batch();
+
+
// would be nice not to have to iterate everything at once here
+
let latest = event_batch.latest_cursor().unwrap();
+
+
for (nsid, commits) in event_batch.commits_by_nsid {
+
for commit in commits.commits {
+
let location_key: RecordLocationKey = (&commit, &nsid).into();
+
+
match commit.action {
+
CommitAction::Cut => {
+
batch.remove(&self.records, &location_key.to_db_bytes()?);
+
}
+
CommitAction::Put(put_action) => {
+
let feed_key = NsidRecordFeedKey::from_pair(nsid.clone(), commit.cursor);
+
let feed_val: NsidRecordFeedVal =
+
(&commit.did, &commit.rkey, commit.rev.as_str()).into();
+
batch.insert(
+
&self.feeds,
+
feed_key.to_db_bytes()?,
+
feed_val.to_db_bytes()?,
+
);
+
+
let location_val: RecordLocationVal =
+
(commit.cursor, commit.rev.as_str(), put_action).into();
+
batch.insert(
+
&self.records,
+
&location_key.to_db_bytes()?,
+
&location_val.to_db_bytes()?,
+
);
+
}
+
}
+
}
+
let live_counts_key: LiveCountsKey = (latest, &nsid).into();
+
let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate);
+
batch.insert(
+
&self.rollups,
+
&live_counts_key.to_db_bytes()?,
+
&counts_value.to_db_bytes()?,
+
);
+
}
+
+
for remove in event_batch.account_removes {
+
let queue_key = DeleteAccountQueueKey::new(remove.cursor);
+
let queue_val: DeleteAccountQueueVal = remove.did;
+
batch.insert(
+
&self.queues,
+
&queue_key.to_db_bytes()?,
+
&queue_val.to_db_bytes()?,
+
);
+
}
+
+
batch.insert(
+
&self.global,
+
DbStaticStr::<JetstreamCursorKey>::default().to_db_bytes()?,
+
latest.to_db_bytes()?,
+
);
+
+
batch.commit()?;
+
Ok(())
+
}
+
+
fn step_rollup(&mut self) -> StorageResult<usize> {
+
let rollup_cursor =
+
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?.ok_or(
+
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
+
)?;
+
+
// timelies
+
let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
+
let mut timely_iter = self.rollups.range(live_counts_range).peekable();
+
+
let timely_next_cursor = timely_iter
+
.peek_mut()
+
.map(|kv| -> StorageResult<Cursor> {
+
match kv {
+
Err(e) => Err(std::mem::replace(e, fjall::Error::Poisoned))?,
+
Ok((key_bytes, _)) => {
+
let key = db_complete::<LiveCountsKey>(key_bytes)?;
+
Ok(key.cursor())
+
}
+
}
+
})
+
.transpose()?;
+
+
// delete accounts
+
let delete_accounts_range =
+
DeleteAccountQueueKey::new(rollup_cursor).range_to_prefix_end()?;
+
+
let next_delete = self
+
.queues
+
.range(delete_accounts_range)
+
.next()
+
.transpose()?
+
.map(|(key_bytes, val_bytes)| {
+
db_complete::<DeleteAccountQueueKey>(&key_bytes)
+
.map(|k| (k.suffix, key_bytes, val_bytes))
+
})
+
.transpose()?;
+
+
let cursors_stepped = match (timely_next_cursor, next_delete) {
+
(
+
Some(timely_next_cursor),
+
Some((delete_cursor, delete_key_bytes, delete_val_bytes)),
+
) => {
+
if timely_next_cursor < delete_cursor {
+
self.rollup_live_counts(
+
timely_iter,
+
Some(delete_cursor),
+
MAX_BATCHED_ROLLUP_COUNTS,
+
)?
+
} else {
+
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
+
}
+
}
+
(Some(_), None) => {
+
self.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)?
+
}
+
(None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => {
+
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
+
}
+
(None, None) => 0,
+
};
+
+
Ok(cursors_stepped)
+
}
+
+
fn trim_collection(
+
&mut self,
+
collection: &Nsid,
+
limit: usize,
+
// TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end)
+
) -> StorageResult<()> {
+
let mut dangling_feed_keys_cleaned = 0;
+
let mut records_deleted = 0;
+
+
let mut batch = self.keyspace.batch();
+
+
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
+
let mut found = 0;
+
for kv in self.feeds.prefix(prefix).rev() {
+
let (key_bytes, val_bytes) = kv?;
+
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
+
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
+
let location_key: RecordLocationKey = (&feed_key, &feed_val).into();
+
let location_key_bytes = location_key.to_db_bytes()?;
+
+
let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else {
+
// record was deleted (hopefully)
+
batch.remove(&self.feeds, &location_key_bytes);
+
dangling_feed_keys_cleaned += 1;
+
continue;
+
};
+
+
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
+
+
if meta.cursor() != feed_key.cursor() {
+
// older/different version
+
batch.remove(&self.feeds, &location_key_bytes);
+
dangling_feed_keys_cleaned += 1;
+
continue;
+
}
+
if meta.rev != feed_val.rev() {
+
// weird...
+
log::warn!("record lookup: cursor match but rev did not...? removing.");
+
batch.remove(&self.feeds, &location_key_bytes);
+
dangling_feed_keys_cleaned += 1;
+
continue;
+
}
+
+
if batch.len() >= MAX_BATCHED_CLEANUP_SIZE {
+
batch.commit()?;
+
batch = self.keyspace.batch();
+
}
+
+
found += 1;
+
if found <= limit {
+
continue;
+
}
+
+
batch.remove(&self.feeds, &location_key_bytes);
+
batch.remove(&self.records, &location_key_bytes);
+
records_deleted += 1;
+
}
+
+
batch.commit()?;
+
+
log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records");
+
Ok(())
+
}
+
+
fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {
+
let mut records_deleted = 0;
+
let mut batch = self.keyspace.batch();
+
let prefix = RecordLocationKey::from_prefix_to_db_bytes(did)?;
+
for kv in self.records.prefix(prefix) {
+
let (key_bytes, _) = kv?;
+
batch.remove(&self.records, key_bytes);
+
records_deleted += 1;
+
if batch.len() >= MAX_BATCHED_ACCOUNT_DELETE_RECORDS {
+
batch.commit()?;
+
batch = self.keyspace.batch();
+
}
+
}
+
batch.commit()?;
+
Ok(records_deleted)
+
}
+
}
+
+
/// Get a value from a fixed key
+
fn get_static_neu<K: StaticStr, V: DbBytes>(global: &PartitionHandle) -> StorageResult<Option<V>> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value = global
+
.get(&key_bytes)?
+
.map(|value_bytes| db_complete(&value_bytes))
+
.transpose()?;
+
Ok(value)
+
}
+
+
/// Get a value from a fixed key
+
fn get_snapshot_static_neu<K: StaticStr, V: DbBytes>(
+
global: &fjall::Snapshot,
+
) -> StorageResult<Option<V>> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value = global
+
.get(&key_bytes)?
+
.map(|value_bytes| db_complete(&value_bytes))
+
.transpose()?;
+
Ok(value)
+
}
+
+
/// Set a value to a fixed key
+
fn insert_static_neu<K: StaticStr>(
+
global: &PartitionHandle,
+
value: impl DbBytes,
+
) -> StorageResult<()> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value_bytes = value.to_db_bytes()?;
+
global.insert(&key_bytes, &value_bytes)?;
+
Ok(())
+
}
+
+
/// Set a value to a fixed key
+
fn insert_batch_static_neu<K: StaticStr>(
+
batch: &mut FjallBatch,
+
global: &PartitionHandle,
+
value: impl DbBytes,
+
) -> StorageResult<()> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value_bytes = value.to_db_bytes()?;
+
batch.insert(global, &key_bytes, &value_bytes);
+
Ok(())
+
}
+
+
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
+
pub struct StorageInfo {
+
pub keyspace_disk_space: u64,
+
pub keyspace_journal_count: usize,
+
pub keyspace_sequence: u64,
+
pub global_approximate_len: usize,
+
}
+
+
////////// temp stuff to remove:
+
+
// fn summarize_batch<const LIMIT: usize>(batch: &EventBatch<LIMIT>) -> String {
+
// format!(
+
// "batch of {: >3} samples from {: >4} records in {: >2} collections from ~{: >4} DIDs, {} acct removes, cursor {: <12?}",
+
// batch.total_records(),
+
// batch.total_seen(),
+
// batch.total_collections(),
+
// batch.estimate_dids(),
+
// batch.account_removes(),
+
// batch.latest_cursor().map(|c| c.elapsed()),
+
// )
+
// }
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
use crate::{DeleteAccount, RecordKey, UFOsCommit};
+
use jetstream::events::{CommitEvent, CommitOp};
+
use jetstream::exports::Cid;
+
use serde_json::value::RawValue;
+
+
fn fjall_db() -> (FjallReader, FjallWriter) {
+
let (read, write, _) = FjallStorage::init(
+
tempfile::tempdir().unwrap(),
+
"offline test (no real jetstream endpoint)".to_string(),
+
false,
+
FjallConfig { temp: true },
+
)
+
.unwrap();
+
(read, write)
+
}
+
+
const TEST_BATCH_LIMIT: usize = 16;
+
+
#[derive(Debug, Default)]
+
struct TestBatch {
+
pub batch: EventBatch<TEST_BATCH_LIMIT>,
+
}
+
+
impl TestBatch {
+
#[allow(clippy::too_many_arguments)]
+
pub fn create(
+
&mut self,
+
did: &str,
+
collection: &str,
+
rkey: &str,
+
record: &str,
+
rev: Option<&str>,
+
cid: Option<Cid>,
+
cursor: u64,
+
) -> Nsid {
+
let did = Did::new(did.to_string()).unwrap();
+
let collection = Nsid::new(collection.to_string()).unwrap();
+
let record = RawValue::from_string(record.to_string()).unwrap();
+
let cid = cid.unwrap_or(
+
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"
+
.parse()
+
.unwrap(),
+
);
+
+
let event = CommitEvent {
+
collection,
+
rkey: RecordKey::new(rkey.to_string()).unwrap(),
+
rev: rev.unwrap_or("asdf").to_string(),
+
operation: CommitOp::Create,
+
record: Some(record),
+
cid: Some(cid),
+
};
+
+
let (commit, collection) =
+
UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor))
+
.unwrap();
+
+
self.batch
+
.commits_by_nsid
+
.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)
+
.unwrap();
+
+
collection
+
}
+
#[allow(clippy::too_many_arguments)]
+
pub fn update(
+
&mut self,
+
did: &str,
+
collection: &str,
+
rkey: &str,
+
record: &str,
+
rev: Option<&str>,
+
cid: Option<Cid>,
+
cursor: u64,
+
) -> Nsid {
+
let did = Did::new(did.to_string()).unwrap();
+
let collection = Nsid::new(collection.to_string()).unwrap();
+
let record = RawValue::from_string(record.to_string()).unwrap();
+
let cid = cid.unwrap_or(
+
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"
+
.parse()
+
.unwrap(),
+
);
+
+
let event = CommitEvent {
+
collection,
+
rkey: RecordKey::new(rkey.to_string()).unwrap(),
+
rev: rev.unwrap_or("asdf").to_string(),
+
operation: CommitOp::Update,
+
record: Some(record),
+
cid: Some(cid),
+
};
+
+
let (commit, collection) =
+
UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor))
+
.unwrap();
+
+
self.batch
+
.commits_by_nsid
+
.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)
+
.unwrap();
+
+
collection
+
}
+
#[allow(clippy::too_many_arguments)]
+
pub fn delete(
+
&mut self,
+
did: &str,
+
collection: &str,
+
rkey: &str,
+
rev: Option<&str>,
+
cursor: u64,
+
) -> Nsid {
+
let did = Did::new(did.to_string()).unwrap();
+
let collection = Nsid::new(collection.to_string()).unwrap();
+
let event = CommitEvent {
+
collection,
+
rkey: RecordKey::new(rkey.to_string()).unwrap(),
+
rev: rev.unwrap_or("asdf").to_string(),
+
operation: CommitOp::Delete,
+
record: None,
+
cid: None,
+
};
+
+
let (commit, collection) =
+
UFOsCommit::from_commit_info(event, did, Cursor::from_raw_u64(cursor)).unwrap();
+
+
self.batch
+
.commits_by_nsid
+
.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)
+
.unwrap();
+
+
collection
+
}
+
pub fn delete_account(&mut self, did: &str, cursor: u64) -> Did {
+
let did = Did::new(did.to_string()).unwrap();
+
self.batch.account_removes.push(DeleteAccount {
+
did: did.clone(),
+
cursor: Cursor::from_raw_u64(cursor),
+
});
+
did
+
}
+
}
+
+
#[test]
+
fn test_hello() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?;
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?;
+
assert_eq!(records, 0);
+
assert_eq!(dids, 0);
+
Ok(())
+
}
+
+
#[test]
+
fn test_insert_one() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
let collection = batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"asdf",
+
"{}",
+
Some("rev-z"),
+
None,
+
100,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let (records, dids) = read.get_counts_by_collection(&collection)?;
+
assert_eq!(records, 1);
+
assert_eq!(dids, 1);
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?;
+
assert_eq!(records, 0);
+
assert_eq!(dids, 0);
+
+
let records = read.get_records_by_collections(&[collection], 2, false)?;
+
assert_eq!(records.len(), 1);
+
let rec = &records[0];
+
assert_eq!(rec.record.get(), "{}");
+
assert!(!rec.is_update);
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_get_multi_collection() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"aaa",
+
r#""earliest""#,
+
Some("rev-a"),
+
None,
+
100,
+
);
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.b",
+
"aab",
+
r#""in between""#,
+
Some("rev-ab"),
+
None,
+
101,
+
);
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"aaa-2",
+
r#""last""#,
+
Some("rev-a-2"),
+
None,
+
102,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[
+
Nsid::new("a.a.a".to_string()).unwrap(),
+
Nsid::new("a.a.b".to_string()).unwrap(),
+
Nsid::new("a.a.c".to_string()).unwrap(),
+
],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 3);
+
assert_eq!(records[0].record.get(), r#""last""#);
+
assert_eq!(
+
records[0].collection,
+
Nsid::new("a.a.a".to_string()).unwrap()
+
);
+
assert_eq!(records[1].record.get(), r#""in between""#);
+
assert_eq!(
+
records[1].collection,
+
Nsid::new("a.a.b".to_string()).unwrap()
+
);
+
assert_eq!(records[2].record.get(), r#""earliest""#);
+
assert_eq!(
+
records[2].collection,
+
Nsid::new("a.a.a".to_string()).unwrap()
+
);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_get_multi_collection_expanded() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
// insert some older ones in aab
+
for i in 1..=3 {
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.b",
+
&format!("aab-{i}"),
+
&format!(r#""b {i}""#),
+
Some(&format!("rev-b-{i}")),
+
None,
+
100 + i,
+
);
+
}
+
// and some newer ones in aaa
+
for i in 1..=3 {
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
&format!("aaa-{i}"),
+
&format!(r#""a {i}""#),
+
Some(&format!("rev-a-{i}")),
+
None,
+
200 + i,
+
);
+
}
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[
+
Nsid::new("a.a.a".to_string()).unwrap(),
+
Nsid::new("a.a.b".to_string()).unwrap(),
+
Nsid::new("a.a.c".to_string()).unwrap(),
+
],
+
2,
+
true,
+
)?;
+
assert_eq!(records.len(), 4);
+
assert_eq!(records[0].record.get(), r#""a 3""#);
+
assert_eq!(
+
records[0].collection,
+
Nsid::new("a.a.a".to_string()).unwrap()
+
);
+
+
assert_eq!(records[3].record.get(), r#""b 2""#);
+
assert_eq!(
+
records[3].collection,
+
Nsid::new("a.a.b".to_string()).unwrap()
+
);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_update_one() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
let collection = batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
"{}",
+
Some("rev-a"),
+
None,
+
100,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.update(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
r#"{"ch": "ch-ch-ch-changes"}"#,
+
Some("rev-z"),
+
None,
+
101,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let (records, dids) = read.get_counts_by_collection(&collection)?;
+
assert_eq!(records, 1);
+
assert_eq!(dids, 1);
+
+
let records = read.get_records_by_collections(&[collection], 2, false)?;
+
assert_eq!(records.len(), 1);
+
let rec = &records[0];
+
assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#);
+
assert!(rec.is_update);
+
Ok(())
+
}
+
+
#[test]
+
fn test_delete_one() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
let collection = batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
"{}",
+
Some("rev-a"),
+
None,
+
100,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.delete(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
Some("rev-z"),
+
101,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let (records, dids) = read.get_counts_by_collection(&collection)?;
+
assert_eq!(records, 1);
+
assert_eq!(dids, 1);
+
+
let records = read.get_records_by_collections(&[collection], 2, false)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_collection_trim() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
let mut last_b_cursor;
+
for i in 1..=10 {
+
last_b_cursor = 11_000 + i;
+
batch.create(
+
&format!("did:plc:inze6wrmsm7pjl7yta3oig7{}", i % 3),
+
"a.a.b",
+
&format!("rkey-bbb-{i}"),
+
&format!(r#"{{"n": {i}}}"#),
+
Some(&format!("rev-bbb-{i}")),
+
None,
+
last_b_cursor,
+
);
+
}
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.c",
+
"rkey-ccc",
+
"{}",
+
Some("rev-ccc"),
+
None,
+
12_000,
+
);
+
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.b".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 10);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.c".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.d".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 0);
+
+
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?;
+
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?;
+
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?;
+
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?;
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.b".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 6);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.c".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.d".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_delete_account() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
for i in 1..=2 {
+
batch.create(
+
"did:plc:person-b",
+
"a.a.a",
+
&format!("rkey-bbb-{i}"),
+
&format!(r#"{{"n": {i}}}"#),
+
Some(&format!("rev-bbb-{i}")),
+
None,
+
11_000 + i,
+
);
+
}
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 3);
+
+
let records_deleted =
+
write.delete_account(&Did::new("did:plc:person-b".to_string()).unwrap())?;
+
assert_eq!(records_deleted, 2);
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
+
Ok(())
+
}
+
+
#[test]
+
fn rollup_delete_account_removes_record() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 9_999); // queue it before the rollup
+
write.insert_batch(batch.batch)?;
+
+
write.step_rollup()?;
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn rollup_delete_live_count_step() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 10_001);
+
write.insert_batch(batch.batch)?;
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
+
assert_eq!(records.len(), 1);
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
+
assert_eq!(records.len(), 0);
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 9_999);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn rollup_multiple_count_batches() -> anyhow::Result<()> {
+
let (_read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aab",
+
"{}",
+
Some("rev-aab"),
+
None,
+
10_001,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 2);
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn counts_before_and_after_rollup() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
batch.create(
+
"did:plc:person-b",
+
"a.a.a",
+
"rkey-bbb",
+
"{}",
+
Some("rev-bbb"),
+
None,
+
10_001,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 11_000);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aac",
+
"{}",
+
Some("rev-aac"),
+
None,
+
12_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
// before any rollup
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// first batch rolled up
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// delete account rolled up
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// second batch rolled up
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// no more rollups left
+
let n = write.step_rollup()?;
+
assert_eq!(n, 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn get_top_collections() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
batch.create(
+
"did:plc:person-b",
+
"a.a.b",
+
"rkey-bbb",
+
"{}",
+
Some("rev-bbb"),
+
None,
+
10_001,
+
);
+
batch.create(
+
"did:plc:person-c",
+
"a.b.c",
+
"rkey-ccc",
+
"{}",
+
Some("rev-ccc"),
+
None,
+
10_002,
+
);
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa-2",
+
"{}",
+
Some("rev-aaa-2"),
+
None,
+
10_003,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 3); // 3 collections
+
+
let tops = read.get_top_collections()?;
+
assert_eq!(
+
tops,
+
TopCollections {
+
total_records: 4,
+
dids_estimate: 3,
+
nsid_child_segments: HashMap::from([(
+
"a".to_string(),
+
TopCollections {
+
total_records: 4,
+
dids_estimate: 3,
+
nsid_child_segments: HashMap::from([
+
(
+
"a".to_string(),
+
TopCollections {
+
total_records: 3,
+
dids_estimate: 2,
+
nsid_child_segments: HashMap::from([
+
(
+
"a".to_string(),
+
TopCollections {
+
total_records: 2,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
},
+
),
+
(
+
"b".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
}
+
),
+
]),
+
},
+
),
+
(
+
"b".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([(
+
"c".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
},
+
),]),
+
},
+
),
+
]),
+
},
+
),]),
+
}
+
);
+
Ok(())
+
}
+
+
#[test]
+
fn get_top_collections_with_parent_nsid() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a.a",
+
"aaaa",
+
r#""child nsid""#,
+
Some("rev-aaaa"),
+
None,
+
100,
+
);
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"aaa",
+
r#""parent nsid""#,
+
Some("rev-aaa"),
+
None,
+
101,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 2); // 3 collections
+
+
let tops = read.get_top_collections()?;
+
assert_eq!(
+
tops,
+
TopCollections {
+
total_records: 2,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([(
+
"a".to_string(),
+
TopCollections {
+
total_records: 2,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([(
+
"a".to_string(),
+
TopCollections {
+
total_records: 2,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([(
+
"a".to_string(),
+
TopCollections {
+
total_records: 2,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([(
+
"a".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
},
+
),]),
+
},
+
),]),
+
},
+
),]),
+
},
+
),]),
+
}
+
);
+
+
// TODO: handle leaf node counts explicitly, since parent NSIDs can be leaves themselves
+
+
Ok(())
+
}
+
}
+1841
ufos/src/storage_mem.rs
···
+
use std::ops::Bound;
+
use std::sync::Arc;
+
+
use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr};
+
use crate::error::StorageError;
+
use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter};
+
use crate::store_types::{
+
AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal,
+
HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue,
+
JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey,
+
NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey,
+
RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue,
+
WeekTruncatedCursor, WeeklyRollupKey,
+
};
+
use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord};
+
use async_trait::async_trait;
+
use jetstream::events::Cursor;
+
use lsm_tree::range::prefix_to_range;
+
use std::collections::BTreeMap;
+
use std::collections::HashMap;
+
use std::path::Path;
+
use std::sync::Mutex;
+
use std::sync::RwLock;
+
use std::time::SystemTime;
+
+
const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds
+
const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024;
+
const MAX_BATCHED_ROLLUP_COUNTS: usize = 256;
+
+
///
+
/// new data format, roughly:
+
///
+
/// Partion: 'global'
+
///
+
/// - Global sequence counter (is the jetstream cursor -- monotonic with many gaps)
+
/// - key: "js_cursor" (literal)
+
/// - val: u64
+
///
+
/// - Jetstream server endpoint (persisted because the cursor can't be used on another instance without data loss)
+
/// - key: "js_endpoint" (literal)
+
/// - val: string (URL of the instance)
+
///
+
/// - Launch date
+
/// - key: "takeoff" (literal)
+
/// - val: u64 (micros timestamp, not from jetstream for now so not precise)
+
///
+
/// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes)
+
/// - key: "rollup_cursor" (literal)
+
/// - val: u64 (tracks behind js_cursor)
+
///
+
///
+
/// Partition: 'feed'
+
///
+
/// - Per-collection list of record references ordered by jetstream cursor
+
/// - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor)
+
/// - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.)
+
///
+
///
+
/// Partition: 'records'
+
///
+
/// - Actual records by their atproto location
+
/// - key: nullstr || nullstr || nullstr (did, collection, rkey)
+
/// - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record)
+
///
+
///
+
/// Partition: 'rollups'
+
///
+
/// - Live (batched) records counts and dids estimate per collection
+
/// - key: "live_counts" || u64 || nullstr (js_cursor, nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - Hourly total record counts and dids estimate per collection
+
/// - key: "hourly_counts" || u64 || nullstr (hour, nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - Weekly total record counts and dids estimate per collection
+
/// - key: "weekly_counts" || u64 || nullstr (hour, nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - All-time total record counts and dids estimate per collection
+
/// - key: "ever_counts" || nullstr (nsid)
+
/// - val: u64 || HLL (count (not cursor), estimator)
+
///
+
/// - TODO: sorted indexes for all-times?
+
///
+
///
+
/// Partition: 'queues'
+
///
+
/// - Delete account queue
+
/// - key: "delete_acount" || u64 (js_cursor)
+
/// - val: nullstr (did)
+
///
+
///
+
/// TODO: moderation actions
+
/// TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy fetching + caching on read.
+
#[derive(Debug)]
+
pub struct MemStorage {}
+
+
#[derive(Debug, Default)]
+
pub struct MemConfig {
+
/// drop the db when the storage is dropped
+
///
+
/// this is only meant for tests
+
#[cfg(test)]
+
pub temp: bool,
+
}
+
+
////////////
+
////////////
+
////////////
+
////////////
+
////////////
+
////////////
+
+
struct BatchSentinel {}
+
+
#[derive(Clone)]
+
struct MemKeyspace {
+
keyspace_guard: Arc<RwLock<BatchSentinel>>,
+
}
+
+
impl MemKeyspace {
+
pub fn open() -> Self {
+
Self {
+
keyspace_guard: Arc::new(RwLock::new(BatchSentinel {})),
+
}
+
}
+
pub fn open_partition(&self, _name: &str) -> StorageResult<MemPartion> {
+
Ok(MemPartion {
+
// name: name.to_string(),
+
keyspace_guard: self.keyspace_guard.clone(),
+
contents: Default::default(),
+
})
+
}
+
pub fn batch(&self) -> MemBatch {
+
MemBatch {
+
keyspace_guard: self.keyspace_guard.clone(),
+
tasks: Vec::new(),
+
}
+
}
+
pub fn instant(&self) -> u64 {
+
1
+
}
+
}
+
+
enum BatchTask {
+
Insert {
+
p: MemPartion,
+
key: Vec<u8>,
+
val: Vec<u8>,
+
},
+
Remove {
+
p: MemPartion,
+
key: Vec<u8>,
+
},
+
}
+
struct MemBatch {
+
keyspace_guard: Arc<RwLock<BatchSentinel>>,
+
tasks: Vec<BatchTask>,
+
}
+
impl MemBatch {
+
pub fn insert(&mut self, p: &MemPartion, key: &[u8], val: &[u8]) {
+
self.tasks.push(BatchTask::Insert {
+
p: p.clone(),
+
key: key.to_vec(),
+
val: val.to_vec(),
+
});
+
}
+
pub fn remove(&mut self, p: &MemPartion, key: &[u8]) {
+
self.tasks.push(BatchTask::Remove {
+
p: p.clone(),
+
key: key.to_vec(),
+
});
+
}
+
pub fn len(&self) -> usize {
+
self.tasks.len()
+
}
+
pub fn commit(&mut self) -> StorageResult<()> {
+
let _guard = self.keyspace_guard.write().unwrap();
+
for task in &mut self.tasks {
+
match task {
+
BatchTask::Insert { p, key, val } => p
+
.contents
+
.try_lock()
+
.unwrap()
+
.insert(key.to_vec(), val.to_vec()),
+
BatchTask::Remove { p, key } => p.contents.try_lock().unwrap().remove(key),
+
};
+
}
+
Ok(())
+
}
+
}
+
+
#[derive(Clone)]
+
struct MemPartion {
+
// name: String,
+
keyspace_guard: Arc<RwLock<BatchSentinel>>,
+
contents: Arc<Mutex<BTreeMap<Vec<u8>, Vec<u8>>>>,
+
}
+
impl MemPartion {
+
pub fn get(&self, key: &[u8]) -> StorageResult<Option<Vec<u8>>> {
+
let _guard = self.keyspace_guard.read().unwrap();
+
Ok(self.contents.lock().unwrap().get(key).cloned())
+
}
+
pub fn prefix(&self, pre: &[u8]) -> Vec<StorageResult<(Vec<u8>, Vec<u8>)>> {
+
// let prefix_bytes = prefix.to_db_bytes()?;
+
let (_, Bound::Excluded(range_end)) = prefix_to_range(pre) else {
+
panic!("bad range thing");
+
};
+
+
return self.range(pre.to_vec()..range_end.to_vec());
+
}
+
pub fn range(&self, r: std::ops::Range<Vec<u8>>) -> Vec<StorageResult<(Vec<u8>, Vec<u8>)>> {
+
let _guard = self.keyspace_guard.read().unwrap();
+
self.contents
+
.lock()
+
.unwrap()
+
.range(r)
+
.map(|(k, v)| Ok((k.clone(), v.clone())))
+
.collect()
+
}
+
pub fn insert(&self, key: &[u8], val: &[u8]) -> StorageResult<()> {
+
let _guard = self.keyspace_guard.read().unwrap();
+
self.contents
+
.lock()
+
.unwrap()
+
.insert(key.to_vec(), val.to_vec());
+
Ok(())
+
}
+
// pub fn remove(&self, key: &[u8]) -> StorageResult<()> {
+
// let _guard = self.keyspace_guard.read().unwrap();
+
// self.contents
+
// .lock()
+
// .unwrap()
+
// .remove(key);
+
// Ok(())
+
// }
+
pub fn snapshot_at(&self, _instant: u64) -> Self {
+
self.clone()
+
}
+
pub fn snapshot(&self) -> Self {
+
self.clone()
+
}
+
}
+
+
////////////
+
////////////
+
////////////
+
////////////
+
////////////
+
////////////
+
+
impl StorageWhatever<MemReader, MemWriter, MemConfig> for MemStorage {
+
fn init(
+
_path: impl AsRef<Path>,
+
endpoint: String,
+
force_endpoint: bool,
+
_config: MemConfig,
+
) -> StorageResult<(MemReader, MemWriter, Option<Cursor>)> {
+
let keyspace = MemKeyspace::open();
+
+
let global = keyspace.open_partition("global")?;
+
let feeds = keyspace.open_partition("feeds")?;
+
let records = keyspace.open_partition("records")?;
+
let rollups = keyspace.open_partition("rollups")?;
+
let queues = keyspace.open_partition("queues")?;
+
+
let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?;
+
+
if js_cursor.is_some() {
+
let stored_endpoint =
+
get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?;
+
+
let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError(
+
"found cursor but missing js_endpoint, refusing to start.".to_string(),
+
))?;
+
+
if stored != endpoint {
+
if force_endpoint {
+
log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}");
+
insert_static_neu::<JetstreamEndpointKey>(
+
&global,
+
JetstreamEndpointValue(endpoint.to_string()),
+
)?;
+
} else {
+
return Err(StorageError::InitError(format!(
+
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start.")));
+
}
+
}
+
} else {
+
insert_static_neu::<JetstreamEndpointKey>(
+
&global,
+
JetstreamEndpointValue(endpoint.to_string()),
+
)?;
+
insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
+
insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
+
}
+
+
let reader = MemReader {
+
keyspace: keyspace.clone(),
+
global: global.clone(),
+
feeds: feeds.clone(),
+
records: records.clone(),
+
rollups: rollups.clone(),
+
};
+
let writer = MemWriter {
+
keyspace,
+
global,
+
feeds,
+
records,
+
rollups,
+
queues,
+
};
+
Ok((reader, writer, js_cursor))
+
}
+
}
+
+
type MemRKV = StorageResult<(Vec<u8>, Vec<u8>)>;
+
+
#[derive(Clone)]
+
pub struct MemReader {
+
keyspace: MemKeyspace,
+
global: MemPartion,
+
feeds: MemPartion,
+
records: MemPartion,
+
rollups: MemPartion,
+
}
+
+
/// An iterator that knows how to skip over deleted/invalidated records
+
struct RecordIterator {
+
db_iter: Box<dyn Iterator<Item = MemRKV>>,
+
records: MemPartion,
+
limit: usize,
+
fetched: usize,
+
}
+
impl RecordIterator {
+
pub fn new(
+
feeds: &MemPartion,
+
records: MemPartion,
+
collection: &Nsid,
+
limit: usize,
+
) -> StorageResult<Self> {
+
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
+
let db_iter = feeds.prefix(&prefix).into_iter().rev();
+
Ok(Self {
+
db_iter: Box::new(db_iter),
+
records,
+
limit,
+
fetched: 0,
+
})
+
}
+
fn get_record(&self, db_next: MemRKV) -> StorageResult<Option<UFOsRecord>> {
+
let (key_bytes, val_bytes) = db_next?;
+
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
+
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
+
let location_key: RecordLocationKey = (&feed_key, &feed_val).into();
+
+
let Some(location_val_bytes) = self.records.get(&location_key.to_db_bytes()?)? else {
+
// record was deleted (hopefully)
+
return Ok(None);
+
};
+
+
let (meta, n) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
+
+
if meta.cursor() != feed_key.cursor() {
+
// older/different version
+
return Ok(None);
+
}
+
if meta.rev != feed_val.rev() {
+
// weird...
+
log::warn!("record lookup: cursor match but rev did not...? excluding.");
+
return Ok(None);
+
}
+
let Some(raw_value_bytes) = location_val_bytes.get(n..) else {
+
log::warn!(
+
"record lookup: found record but could not get bytes to decode the record??"
+
);
+
return Ok(None);
+
};
+
let rawval = db_complete::<RecordRawValue>(raw_value_bytes)?;
+
Ok(Some(UFOsRecord {
+
collection: feed_key.collection().clone(),
+
cursor: feed_key.cursor(),
+
did: feed_val.did().clone(),
+
rkey: feed_val.rkey().clone(),
+
rev: meta.rev.to_string(),
+
record: rawval.try_into()?,
+
is_update: meta.is_update,
+
}))
+
}
+
}
+
impl Iterator for RecordIterator {
+
type Item = StorageResult<Option<UFOsRecord>>;
+
fn next(&mut self) -> Option<Self::Item> {
+
if self.fetched == self.limit {
+
return Some(Ok(None));
+
}
+
let record = loop {
+
let db_next = self.db_iter.next()?; // None short-circuits here
+
match self.get_record(db_next) {
+
Err(e) => return Some(Err(e)),
+
Ok(Some(record)) => break record,
+
Ok(None) => continue,
+
}
+
};
+
self.fetched += 1;
+
Some(Ok(Some(record)))
+
}
+
}
+
+
impl MemReader {
+
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
+
let rollup_cursor =
+
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?
+
.map(|c| c.to_raw_u64());
+
+
Ok(serde_json::json!({
+
"rollup_cursor": rollup_cursor,
+
}))
+
}
+
+
fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> {
+
let global = self.global.snapshot();
+
+
let endpoint =
+
get_snapshot_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?
+
.ok_or(StorageError::BadStateError(
+
"Could not find jetstream endpoint".to_string(),
+
))?
+
.0;
+
+
let started_at = get_snapshot_static_neu::<TakeoffKey, TakeoffValue>(&global)?
+
.ok_or(StorageError::BadStateError(
+
"Could not find jetstream takeoff time".to_string(),
+
))?
+
.to_raw_u64();
+
+
let latest_cursor =
+
get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?
+
.map(|c| c.to_raw_u64());
+
+
Ok(ConsumerInfo::Jetstream {
+
endpoint,
+
started_at,
+
latest_cursor,
+
})
+
}
+
+
fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
+
// TODO: limit nsid traversal depth
+
// TODO: limit nsid traversal breadth
+
// TODO: be serious about anything
+
+
// TODO: probably use a stack of segments to reduce to ~log-n merges
+
+
#[derive(Default)]
+
struct Blah {
+
counts: CountsValue,
+
children: HashMap<String, Blah>,
+
}
+
impl From<&Blah> for TopCollections {
+
fn from(bla: &Blah) -> Self {
+
Self {
+
total_records: bla.counts.records(),
+
dids_estimate: bla.counts.dids().estimate() as u64,
+
nsid_child_segments: HashMap::from_iter(
+
bla.children.iter().map(|(k, v)| (k.to_string(), v.into())),
+
),
+
}
+
}
+
}
+
+
let mut b = Blah::default();
+
let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?;
+
for kv in self.rollups.prefix(&prefix.to_db_bytes()?) {
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<AllTimeRollupKey>(&key_bytes)?;
+
let val = db_complete::<CountsValue>(&val_bytes)?;
+
+
let mut node = &mut b;
+
node.counts.merge(&val);
+
for segment in key.collection().split('.') {
+
node = node.children.entry(segment.to_string()).or_default();
+
node.counts.merge(&val);
+
}
+
}
+
+
Ok((&b).into())
+
}
+
+
fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
+
// 0. grab a snapshot in case rollups happen while we're working
+
let instant = self.keyspace.instant();
+
let global = self.global.snapshot_at(instant);
+
let rollups = self.rollups.snapshot_at(instant);
+
+
// 1. all-time counts
+
let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?;
+
let mut total_counts = rollups
+
.get(&all_time_key)?
+
.as_deref()
+
.map(db_complete::<CountsValue>)
+
.transpose()?
+
.unwrap_or_default();
+
+
// 2. live counts that haven't been rolled into all-time yet.
+
let rollup_cursor =
+
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or(
+
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
+
)?;
+
+
let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
+
for kv in rollups.range(full_range) {
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<LiveCountsKey>(&key_bytes)?;
+
if key.collection() == collection {
+
let counts = db_complete::<CountsValue>(&val_bytes)?;
+
total_counts.merge(&counts);
+
}
+
}
+
Ok((
+
total_counts.records(),
+
total_counts.dids().estimate() as u64,
+
))
+
}
+
+
fn get_records_by_collections(
+
&self,
+
collections: &[Nsid],
+
limit: usize,
+
_expand_each_collection: bool,
+
) -> StorageResult<Vec<UFOsRecord>> {
+
if collections.is_empty() {
+
return Ok(vec![]);
+
}
+
let mut record_iterators = Vec::new();
+
for collection in collections {
+
let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?;
+
record_iterators.push(iter.peekable());
+
}
+
let mut merged = Vec::new();
+
loop {
+
let mut latest: Option<(Cursor, usize)> = None; // ugh
+
for (i, iter) in record_iterators.iter_mut().enumerate() {
+
let Some(it) = iter.peek_mut() else {
+
continue;
+
};
+
let it = match it {
+
Ok(v) => v,
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
+
};
+
let Some(rec) = it else {
+
break;
+
};
+
if let Some((cursor, _)) = latest {
+
if rec.cursor > cursor {
+
latest = Some((rec.cursor, i))
+
}
+
} else {
+
latest = Some((rec.cursor, i));
+
}
+
}
+
let Some((_, idx)) = latest else {
+
break;
+
};
+
// yeah yeah whateverrrrrrrrrrrrrrrr
+
merged.push(record_iterators[idx].next().unwrap().unwrap().unwrap());
+
}
+
Ok(merged)
+
}
+
}
+
+
#[async_trait]
+
impl StoreReader for MemReader {
+
async fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || MemReader::get_storage_stats(&s)).await?
+
}
+
async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || MemReader::get_consumer_info(&s)).await?
+
}
+
async fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || MemReader::get_top_collections(&s)).await?
+
}
+
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
+
let s = self.clone();
+
let collection = collection.clone();
+
tokio::task::spawn_blocking(move || MemReader::get_counts_by_collection(&s, &collection))
+
.await?
+
}
+
async fn get_records_by_collections(
+
&self,
+
collections: &[Nsid],
+
limit: usize,
+
expand_each_collection: bool,
+
) -> StorageResult<Vec<UFOsRecord>> {
+
let s = self.clone();
+
let collections = collections.to_vec();
+
tokio::task::spawn_blocking(move || {
+
MemReader::get_records_by_collections(&s, &collections, limit, expand_each_collection)
+
})
+
.await?
+
}
+
}
+
+
pub struct MemWriter {
+
keyspace: MemKeyspace,
+
global: MemPartion,
+
feeds: MemPartion,
+
records: MemPartion,
+
rollups: MemPartion,
+
queues: MemPartion,
+
}
+
+
impl MemWriter {
+
fn rollup_delete_account(
+
&mut self,
+
cursor: Cursor,
+
key_bytes: &[u8],
+
val_bytes: &[u8],
+
) -> StorageResult<usize> {
+
let did = db_complete::<DeleteAccountQueueVal>(val_bytes)?;
+
self.delete_account(&did)?;
+
let mut batch = self.keyspace.batch();
+
batch.remove(&self.queues, key_bytes);
+
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, cursor)?;
+
batch.commit()?;
+
Ok(1)
+
}
+
+
fn rollup_live_counts(
+
&mut self,
+
timelies: impl Iterator<Item = Result<(Vec<u8>, Vec<u8>), StorageError>>,
+
cursor_exclusive_limit: Option<Cursor>,
+
rollup_limit: usize,
+
) -> StorageResult<usize> {
+
// current strategy is to buffer counts in mem before writing the rollups
+
// we *could* read+write every single batch to rollup.. but their merge is associative so
+
// ...so save the db some work up front? is this worth it? who knows...
+
+
log::warn!("sup!!!");
+
+
#[derive(Eq, Hash, PartialEq)]
+
enum Rollup {
+
Hourly(HourTruncatedCursor),
+
Weekly(WeekTruncatedCursor),
+
AllTime,
+
}
+
+
let mut batch = self.keyspace.batch();
+
let mut cursors_advanced = 0;
+
let mut last_cursor = Cursor::from_start();
+
let mut counts_by_rollup: HashMap<(Nsid, Rollup), CountsValue> = HashMap::new();
+
+
log::warn!("about to loop....");
+
for (i, kv) in timelies.enumerate() {
+
log::warn!("loop {i} {kv:?}...");
+
if i >= rollup_limit {
+
break;
+
}
+
+
let (key_bytes, val_bytes) = kv?;
+
let key = db_complete::<LiveCountsKey>(&key_bytes)
+
.inspect_err(|e| log::warn!("rlc: key: {e:?}"))?;
+
+
if cursor_exclusive_limit
+
.map(|limit| key.cursor() > limit)
+
.unwrap_or(false)
+
{
+
break;
+
}
+
+
batch.remove(&self.rollups, &key_bytes);
+
let val = db_complete::<CountsValue>(&val_bytes)
+
.inspect_err(|e| log::warn!("rlc: val: {e:?}"))?;
+
counts_by_rollup
+
.entry((
+
key.collection().clone(),
+
Rollup::Hourly(key.cursor().into()),
+
))
+
.or_default()
+
.merge(&val);
+
counts_by_rollup
+
.entry((
+
key.collection().clone(),
+
Rollup::Weekly(key.cursor().into()),
+
))
+
.or_default()
+
.merge(&val);
+
counts_by_rollup
+
.entry((key.collection().clone(), Rollup::AllTime))
+
.or_default()
+
.merge(&val);
+
+
cursors_advanced += 1;
+
last_cursor = key.cursor();
+
}
+
log::warn!("done looping. looping cbr counts(?)..");
+
+
for ((nsid, rollup), counts) in counts_by_rollup {
+
log::warn!(
+
"######################## cbr loop {nsid:?} {counts:?} ########################"
+
);
+
let key_bytes = match rollup {
+
Rollup::Hourly(hourly_cursor) => {
+
let k = HourlyRollupKey::new(hourly_cursor, &nsid);
+
log::info!("hrly k: {k:?}");
+
k.to_db_bytes()?
+
}
+
Rollup::Weekly(weekly_cursor) => {
+
let k = WeeklyRollupKey::new(weekly_cursor, &nsid);
+
log::info!("weekly k: {k:?}");
+
k.to_db_bytes()?
+
}
+
Rollup::AllTime => {
+
let k = AllTimeRollupKey::new(&nsid);
+
log::info!("alltime k: {k:?}");
+
k.to_db_bytes()?
+
}
+
};
+
// log::info!("key bytes: {key_bytes:?}");
+
let mut rolled: CountsValue = self
+
.rollups
+
.get(&key_bytes)?
+
.inspect(|v| {
+
let lax = CountsValue::from_db_bytes(v);
+
log::info!(
+
"val: len={}, lax={lax:?} first32={:?}",
+
v.len(),
+
v.get(..32)
+
);
+
})
+
.as_deref()
+
.map(db_complete::<CountsValue>)
+
.transpose()
+
.inspect_err(|e| log::warn!("oooh did we break on the rolled thing? {e:?}"))?
+
.unwrap_or_default();
+
+
// try to round-trip before inserting, for funsies
+
let tripppin = counts.to_db_bytes()?;
+
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
+
assert_eq!(n, tripppin.len());
+
assert_eq!(counts.prefix, and_back.prefix);
+
assert_eq!(counts.dids().estimate(), and_back.dids().estimate());
+
if counts.records() > 20000000 {
+
panic!("COUNTS maybe wtf? {counts:?}")
+
}
+
// assert_eq!(rolled, and_back);
+
+
rolled.merge(&counts);
+
+
// try to round-trip before inserting, for funsies
+
let tripppin = rolled.to_db_bytes()?;
+
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
+
assert_eq!(n, tripppin.len());
+
assert_eq!(rolled.prefix, and_back.prefix);
+
assert_eq!(rolled.dids().estimate(), and_back.dids().estimate());
+
if rolled.records() > 20000000 {
+
panic!("maybe wtf? {rolled:?}")
+
}
+
// assert_eq!(rolled, and_back);
+
+
batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?);
+
}
+
+
log::warn!("done cbr loop.");
+
+
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)
+
.inspect_err(|e| log::warn!("insert neu: {e:?}"))?;
+
+
batch.commit()?;
+
+
log::warn!("ok finished rlc stuff. huh.");
+
Ok(cursors_advanced)
+
}
+
}
+
+
impl StoreWriter for MemWriter {
+
fn insert_batch<const LIMIT: usize>(
+
&mut self,
+
event_batch: EventBatch<LIMIT>,
+
) -> StorageResult<()> {
+
if event_batch.is_empty() {
+
return Ok(());
+
}
+
+
let mut batch = self.keyspace.batch();
+
+
// would be nice not to have to iterate everything at once here
+
let latest = event_batch.latest_cursor().unwrap();
+
+
for (nsid, commits) in event_batch.commits_by_nsid {
+
for commit in commits.commits {
+
let location_key: RecordLocationKey = (&commit, &nsid).into();
+
+
match commit.action {
+
CommitAction::Cut => {
+
batch.remove(&self.records, &location_key.to_db_bytes()?);
+
}
+
CommitAction::Put(put_action) => {
+
let feed_key = NsidRecordFeedKey::from_pair(nsid.clone(), commit.cursor);
+
let feed_val: NsidRecordFeedVal =
+
(&commit.did, &commit.rkey, commit.rev.as_str()).into();
+
batch.insert(
+
&self.feeds,
+
&feed_key.to_db_bytes()?,
+
&feed_val.to_db_bytes()?,
+
);
+
+
let location_val: RecordLocationVal =
+
(commit.cursor, commit.rev.as_str(), put_action).into();
+
batch.insert(
+
&self.records,
+
&location_key.to_db_bytes()?,
+
&location_val.to_db_bytes()?,
+
);
+
}
+
}
+
}
+
let live_counts_key: LiveCountsKey = (latest, &nsid).into();
+
let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate);
+
batch.insert(
+
&self.rollups,
+
&live_counts_key.to_db_bytes()?,
+
&counts_value.to_db_bytes()?,
+
);
+
}
+
+
for remove in event_batch.account_removes {
+
let queue_key = DeleteAccountQueueKey::new(remove.cursor);
+
let queue_val: DeleteAccountQueueVal = remove.did;
+
batch.insert(
+
&self.queues,
+
&queue_key.to_db_bytes()?,
+
&queue_val.to_db_bytes()?,
+
);
+
}
+
+
batch.insert(
+
&self.global,
+
&DbStaticStr::<JetstreamCursorKey>::default().to_db_bytes()?,
+
&latest.to_db_bytes()?,
+
);
+
+
batch.commit()?;
+
Ok(())
+
}
+
+
fn step_rollup(&mut self) -> StorageResult<usize> {
+
let rollup_cursor =
+
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?
+
.ok_or(StorageError::BadStateError(
+
"Could not find current rollup cursor".to_string(),
+
))
+
.inspect_err(|e| log::warn!("failed getting rollup cursor: {e:?}"))?;
+
+
// timelies
+
let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)
+
.inspect_err(|e| log::warn!("live counts range: {e:?}"))?;
+
let mut timely_iter = self.rollups.range(live_counts_range).into_iter().peekable();
+
+
let timely_next_cursor = timely_iter
+
.peek_mut()
+
.map(|kv| -> StorageResult<Cursor> {
+
match kv {
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
+
Ok((key_bytes, _)) => {
+
let key = db_complete::<LiveCountsKey>(key_bytes).inspect_err(|e| {
+
log::warn!("failed getting key for next timely: {e:?}")
+
})?;
+
Ok(key.cursor())
+
}
+
}
+
})
+
.transpose()
+
.inspect_err(|e| log::warn!("something about timely: {e:?}"))?;
+
+
// delete accounts
+
let delete_accounts_range =
+
DeleteAccountQueueKey::new(rollup_cursor).range_to_prefix_end()?;
+
+
let next_delete = self
+
.queues
+
.range(delete_accounts_range)
+
.into_iter()
+
.next()
+
.transpose()
+
.inspect_err(|e| log::warn!("range for next delete: {e:?}"))?
+
.map(|(key_bytes, val_bytes)| {
+
db_complete::<DeleteAccountQueueKey>(&key_bytes)
+
.inspect_err(|e| log::warn!("failed inside next delete thing????: {e:?}"))
+
.map(|k| (k.suffix, key_bytes, val_bytes))
+
})
+
.transpose()
+
.inspect_err(|e| log::warn!("failed getting next delete: {e:?}"))?;
+
+
let cursors_stepped = match (timely_next_cursor, next_delete) {
+
(
+
Some(timely_next_cursor),
+
Some((delete_cursor, delete_key_bytes, delete_val_bytes)),
+
) => {
+
if timely_next_cursor < delete_cursor {
+
self.rollup_live_counts(
+
timely_iter,
+
Some(delete_cursor),
+
MAX_BATCHED_ROLLUP_COUNTS,
+
)
+
.inspect_err(|e| log::warn!("rolling up live counts: {e:?}"))?
+
} else {
+
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)
+
.inspect_err(|e| log::warn!("deleting acocunt: {e:?}"))?
+
}
+
}
+
(Some(_), None) => self
+
.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)
+
.inspect_err(|e| log::warn!("rolling up (lasjdflkajs): {e:?}"))?,
+
(None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => self
+
.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)
+
.inspect_err(|e| log::warn!("deleting acocunt other branch: {e:?}"))?,
+
(None, None) => 0,
+
};
+
+
Ok(cursors_stepped)
+
}
+
+
fn trim_collection(
+
&mut self,
+
collection: &Nsid,
+
limit: usize,
+
// TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end)
+
) -> StorageResult<()> {
+
let mut dangling_feed_keys_cleaned = 0;
+
let mut records_deleted = 0;
+
+
let mut batch = self.keyspace.batch();
+
+
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
+
let mut found = 0;
+
for kv in self.feeds.prefix(&prefix).into_iter().rev() {
+
let (key_bytes, val_bytes) = kv?;
+
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
+
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
+
let location_key: RecordLocationKey = (&feed_key, &feed_val).into();
+
let location_key_bytes = location_key.to_db_bytes()?;
+
+
let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else {
+
// record was deleted (hopefully)
+
batch.remove(&self.feeds, &location_key_bytes);
+
dangling_feed_keys_cleaned += 1;
+
continue;
+
};
+
+
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
+
+
if meta.cursor() != feed_key.cursor() {
+
// older/different version
+
batch.remove(&self.feeds, &location_key_bytes);
+
dangling_feed_keys_cleaned += 1;
+
continue;
+
}
+
if meta.rev != feed_val.rev() {
+
// weird...
+
log::warn!("record lookup: cursor match but rev did not...? removing.");
+
batch.remove(&self.feeds, &location_key_bytes);
+
dangling_feed_keys_cleaned += 1;
+
continue;
+
}
+
+
if batch.len() >= MAX_BATCHED_CLEANUP_SIZE {
+
batch.commit()?;
+
batch = self.keyspace.batch();
+
}
+
+
found += 1;
+
if found <= limit {
+
continue;
+
}
+
+
batch.remove(&self.feeds, &location_key_bytes);
+
batch.remove(&self.records, &location_key_bytes);
+
records_deleted += 1;
+
}
+
+
batch.commit()?;
+
+
log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records");
+
Ok(())
+
}
+
+
fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {
+
let mut records_deleted = 0;
+
let mut batch = self.keyspace.batch();
+
let prefix = RecordLocationKey::from_prefix_to_db_bytes(did)?;
+
for kv in self.records.prefix(&prefix) {
+
let (key_bytes, _) = kv?;
+
batch.remove(&self.records, &key_bytes);
+
records_deleted += 1;
+
if batch.len() >= MAX_BATCHED_ACCOUNT_DELETE_RECORDS {
+
batch.commit()?;
+
batch = self.keyspace.batch();
+
}
+
}
+
batch.commit()?;
+
Ok(records_deleted)
+
}
+
}
+
+
/// Get a value from a fixed key
+
fn get_static_neu<K: StaticStr, V: DbBytes>(global: &MemPartion) -> StorageResult<Option<V>> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value = global
+
.get(&key_bytes)?
+
.map(|value_bytes| db_complete(&value_bytes))
+
.transpose()?;
+
Ok(value)
+
}
+
+
/// Get a value from a fixed key
+
fn get_snapshot_static_neu<K: StaticStr, V: DbBytes>(
+
global: &MemPartion,
+
) -> StorageResult<Option<V>> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value = global
+
.get(&key_bytes)?
+
.map(|value_bytes| db_complete(&value_bytes))
+
.transpose()?;
+
Ok(value)
+
}
+
+
/// Set a value to a fixed key
+
fn insert_static_neu<K: StaticStr>(global: &MemPartion, value: impl DbBytes) -> StorageResult<()> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value_bytes = value.to_db_bytes()?;
+
global.insert(&key_bytes, &value_bytes)?;
+
Ok(())
+
}
+
+
/// Set a value to a fixed key
+
fn insert_batch_static_neu<K: StaticStr>(
+
batch: &mut MemBatch,
+
global: &MemPartion,
+
value: impl DbBytes,
+
) -> StorageResult<()> {
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
+
let value_bytes = value.to_db_bytes()?;
+
batch.insert(global, &key_bytes, &value_bytes);
+
Ok(())
+
}
+
+
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
+
pub struct StorageInfo {
+
pub keyspace_disk_space: u64,
+
pub keyspace_journal_count: usize,
+
pub keyspace_sequence: u64,
+
pub global_approximate_len: usize,
+
}
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
use crate::{DeleteAccount, RecordKey, UFOsCommit};
+
use jetstream::events::{CommitEvent, CommitOp};
+
use jetstream::exports::Cid;
+
use serde_json::value::RawValue;
+
+
fn fjall_db() -> (MemReader, MemWriter) {
+
let (read, write, _) = MemStorage::init(
+
tempfile::tempdir().unwrap(),
+
"offline test (no real jetstream endpoint)".to_string(),
+
false,
+
MemConfig { temp: true },
+
)
+
.unwrap();
+
(read, write)
+
}
+
+
const TEST_BATCH_LIMIT: usize = 16;
+
+
#[derive(Debug, Default)]
+
struct TestBatch {
+
pub batch: EventBatch<TEST_BATCH_LIMIT>,
+
}
+
+
impl TestBatch {
+
#[allow(clippy::too_many_arguments)]
+
pub fn create(
+
&mut self,
+
did: &str,
+
collection: &str,
+
rkey: &str,
+
record: &str,
+
rev: Option<&str>,
+
cid: Option<Cid>,
+
cursor: u64,
+
) -> Nsid {
+
let did = Did::new(did.to_string()).unwrap();
+
let collection = Nsid::new(collection.to_string()).unwrap();
+
let record = RawValue::from_string(record.to_string()).unwrap();
+
let cid = cid.unwrap_or(
+
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"
+
.parse()
+
.unwrap(),
+
);
+
+
let event = CommitEvent {
+
collection,
+
rkey: RecordKey::new(rkey.to_string()).unwrap(),
+
rev: rev.unwrap_or("asdf").to_string(),
+
operation: CommitOp::Create,
+
record: Some(record),
+
cid: Some(cid),
+
};
+
+
let (commit, collection) =
+
UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor))
+
.unwrap();
+
+
self.batch
+
.commits_by_nsid
+
.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)
+
.unwrap();
+
+
collection
+
}
+
#[allow(clippy::too_many_arguments)]
+
pub fn update(
+
&mut self,
+
did: &str,
+
collection: &str,
+
rkey: &str,
+
record: &str,
+
rev: Option<&str>,
+
cid: Option<Cid>,
+
cursor: u64,
+
) -> Nsid {
+
let did = Did::new(did.to_string()).unwrap();
+
let collection = Nsid::new(collection.to_string()).unwrap();
+
let record = RawValue::from_string(record.to_string()).unwrap();
+
let cid = cid.unwrap_or(
+
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"
+
.parse()
+
.unwrap(),
+
);
+
+
let event = CommitEvent {
+
collection,
+
rkey: RecordKey::new(rkey.to_string()).unwrap(),
+
rev: rev.unwrap_or("asdf").to_string(),
+
operation: CommitOp::Update,
+
record: Some(record),
+
cid: Some(cid),
+
};
+
+
let (commit, collection) =
+
UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor))
+
.unwrap();
+
+
self.batch
+
.commits_by_nsid
+
.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)
+
.unwrap();
+
+
collection
+
}
+
#[allow(clippy::too_many_arguments)]
+
pub fn delete(
+
&mut self,
+
did: &str,
+
collection: &str,
+
rkey: &str,
+
rev: Option<&str>,
+
cursor: u64,
+
) -> Nsid {
+
let did = Did::new(did.to_string()).unwrap();
+
let collection = Nsid::new(collection.to_string()).unwrap();
+
let event = CommitEvent {
+
collection,
+
rkey: RecordKey::new(rkey.to_string()).unwrap(),
+
rev: rev.unwrap_or("asdf").to_string(),
+
operation: CommitOp::Delete,
+
record: None,
+
cid: None,
+
};
+
+
let (commit, collection) =
+
UFOsCommit::from_commit_info(event, did, Cursor::from_raw_u64(cursor)).unwrap();
+
+
self.batch
+
.commits_by_nsid
+
.entry(collection.clone())
+
.or_default()
+
.truncating_insert(commit)
+
.unwrap();
+
+
collection
+
}
+
pub fn delete_account(&mut self, did: &str, cursor: u64) -> Did {
+
let did = Did::new(did.to_string()).unwrap();
+
self.batch.account_removes.push(DeleteAccount {
+
did: did.clone(),
+
cursor: Cursor::from_raw_u64(cursor),
+
});
+
did
+
}
+
}
+
+
#[test]
+
fn test_hello() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?;
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?;
+
assert_eq!(records, 0);
+
assert_eq!(dids, 0);
+
Ok(())
+
}
+
+
#[test]
+
fn test_insert_one() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
let collection = batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"asdf",
+
"{}",
+
Some("rev-z"),
+
None,
+
100,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let (records, dids) = read.get_counts_by_collection(&collection)?;
+
assert_eq!(records, 1);
+
assert_eq!(dids, 1);
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?;
+
assert_eq!(records, 0);
+
assert_eq!(dids, 0);
+
+
let records = read.get_records_by_collections(&[collection], 2, false)?;
+
assert_eq!(records.len(), 1);
+
let rec = &records[0];
+
assert_eq!(rec.record.get(), "{}");
+
assert!(!rec.is_update);
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_get_multi_collection() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"aaa",
+
r#""earliest""#,
+
Some("rev-a"),
+
None,
+
100,
+
);
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.b",
+
"aab",
+
r#""in between""#,
+
Some("rev-ab"),
+
None,
+
101,
+
);
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"aaa-2",
+
r#""last""#,
+
Some("rev-a-2"),
+
None,
+
102,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[
+
Nsid::new("a.a.a".to_string()).unwrap(),
+
Nsid::new("a.a.b".to_string()).unwrap(),
+
Nsid::new("a.a.c".to_string()).unwrap(),
+
],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 3);
+
assert_eq!(records[0].record.get(), r#""last""#);
+
assert_eq!(
+
records[0].collection,
+
Nsid::new("a.a.a".to_string()).unwrap()
+
);
+
assert_eq!(records[1].record.get(), r#""in between""#);
+
assert_eq!(
+
records[1].collection,
+
Nsid::new("a.a.b".to_string()).unwrap()
+
);
+
assert_eq!(records[2].record.get(), r#""earliest""#);
+
assert_eq!(
+
records[2].collection,
+
Nsid::new("a.a.a".to_string()).unwrap()
+
);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_update_one() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
let collection = batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
"{}",
+
Some("rev-a"),
+
None,
+
100,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.update(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
r#"{"ch": "ch-ch-ch-changes"}"#,
+
Some("rev-z"),
+
None,
+
101,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let (records, dids) = read.get_counts_by_collection(&collection)?;
+
assert_eq!(records, 1);
+
assert_eq!(dids, 1);
+
+
let records = read.get_records_by_collections(&[collection], 2, false)?;
+
assert_eq!(records.len(), 1);
+
let rec = &records[0];
+
assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#);
+
assert!(rec.is_update);
+
Ok(())
+
}
+
+
#[test]
+
fn test_delete_one() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
let collection = batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
"{}",
+
Some("rev-a"),
+
None,
+
100,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.delete(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.b.c",
+
"rkey-asdf",
+
Some("rev-z"),
+
101,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let (records, dids) = read.get_counts_by_collection(&collection)?;
+
assert_eq!(records, 1);
+
assert_eq!(dids, 1);
+
+
let records = read.get_records_by_collections(&[collection], 2, false)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_collection_trim() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
let mut last_b_cursor;
+
for i in 1..=10 {
+
last_b_cursor = 11_000 + i;
+
batch.create(
+
&format!("did:plc:inze6wrmsm7pjl7yta3oig7{}", i % 3),
+
"a.a.b",
+
&format!("rkey-bbb-{i}"),
+
&format!(r#"{{"n": {i}}}"#),
+
Some(&format!("rev-bbb-{i}")),
+
None,
+
last_b_cursor,
+
);
+
}
+
batch.create(
+
"did:plc:inze6wrmsm7pjl7yta3oig77",
+
"a.a.c",
+
"rkey-ccc",
+
"{}",
+
Some("rev-ccc"),
+
None,
+
12_000,
+
);
+
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.b".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 10);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.c".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.d".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 0);
+
+
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?;
+
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?;
+
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?;
+
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?;
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.b".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 6);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.c".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.d".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn test_delete_account() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
for i in 1..=2 {
+
batch.create(
+
"did:plc:person-b",
+
"a.a.a",
+
&format!("rkey-bbb-{i}"),
+
&format!(r#"{{"n": {i}}}"#),
+
Some(&format!("rev-bbb-{i}")),
+
None,
+
11_000 + i,
+
);
+
}
+
write.insert_batch(batch.batch)?;
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 3);
+
+
let records_deleted =
+
write.delete_account(&Did::new("did:plc:person-b".to_string()).unwrap())?;
+
assert_eq!(records_deleted, 2);
+
+
let records = read.get_records_by_collections(
+
&[Nsid::new("a.a.a".to_string()).unwrap()],
+
100,
+
false,
+
)?;
+
assert_eq!(records.len(), 1);
+
+
Ok(())
+
}
+
+
#[test]
+
fn rollup_delete_account_removes_record() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 9_999); // queue it before the rollup
+
write.insert_batch(batch.batch)?;
+
+
write.step_rollup()?;
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
+
assert_eq!(records.len(), 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn rollup_delete_live_count_step() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 10_001);
+
write.insert_batch(batch.batch)?;
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
+
assert_eq!(records.len(), 1);
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let records =
+
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
+
assert_eq!(records.len(), 0);
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 9_999);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn rollup_multiple_count_batches() -> anyhow::Result<()> {
+
let (_read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aab",
+
"{}",
+
Some("rev-aab"),
+
None,
+
10_001,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 2);
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn counts_before_and_after_rollup() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
batch.create(
+
"did:plc:person-b",
+
"a.a.a",
+
"rkey-bbb",
+
"{}",
+
Some("rev-bbb"),
+
None,
+
10_001,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.delete_account("did:plc:person-a", 11_000);
+
write.insert_batch(batch.batch)?;
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aac",
+
"{}",
+
Some("rev-aac"),
+
None,
+
12_000,
+
);
+
write.insert_batch(batch.batch)?;
+
+
// before any rollup
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// first batch rolled up
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// delete account rolled up
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// second batch rolled up
+
let n = write.step_rollup()?;
+
assert_eq!(n, 1);
+
+
let (records, dids) =
+
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
+
assert_eq!(records, 3);
+
assert_eq!(dids, 2);
+
+
// no more rollups left
+
let n = write.step_rollup()?;
+
assert_eq!(n, 0);
+
+
Ok(())
+
}
+
+
#[test]
+
fn get_top_collections() -> anyhow::Result<()> {
+
let (read, mut write) = fjall_db();
+
+
let mut batch = TestBatch::default();
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa",
+
"{}",
+
Some("rev-aaa"),
+
None,
+
10_000,
+
);
+
batch.create(
+
"did:plc:person-b",
+
"a.a.b",
+
"rkey-bbb",
+
"{}",
+
Some("rev-bbb"),
+
None,
+
10_001,
+
);
+
batch.create(
+
"did:plc:person-c",
+
"a.b.c",
+
"rkey-ccc",
+
"{}",
+
Some("rev-ccc"),
+
None,
+
10_002,
+
);
+
batch.create(
+
"did:plc:person-a",
+
"a.a.a",
+
"rkey-aaa-2",
+
"{}",
+
Some("rev-aaa-2"),
+
None,
+
10_003,
+
);
+
write.insert_batch(batch.batch)?;
+
+
let n = write.step_rollup()?;
+
assert_eq!(n, 3); // 3 collections
+
+
let tops = read.get_top_collections()?;
+
assert_eq!(
+
tops,
+
TopCollections {
+
total_records: 4,
+
dids_estimate: 3,
+
nsid_child_segments: HashMap::from([(
+
"a".to_string(),
+
TopCollections {
+
total_records: 4,
+
dids_estimate: 3,
+
nsid_child_segments: HashMap::from([
+
(
+
"a".to_string(),
+
TopCollections {
+
total_records: 3,
+
dids_estimate: 2,
+
nsid_child_segments: HashMap::from([
+
(
+
"a".to_string(),
+
TopCollections {
+
total_records: 2,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
},
+
),
+
(
+
"b".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
}
+
),
+
]),
+
},
+
),
+
(
+
"b".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([(
+
"c".to_string(),
+
TopCollections {
+
total_records: 1,
+
dids_estimate: 1,
+
nsid_child_segments: HashMap::from([]),
+
},
+
),]),
+
},
+
),
+
]),
+
},
+
),]),
+
}
+
);
+
Ok(())
+
}
+
}
-802
ufos/src/store.rs
···
-
use crate::db_types::{db_complete, DbBytes, DbStaticStr, EncodingError, StaticStr};
-
use crate::store_types::{
-
ByCollectionKey, ByCollectionValue, ByCursorSeenKey, ByCursorSeenValue, ByIdKey, ByIdValue,
-
JetstreamCursorKey, JetstreamCursorValue, JetstreamEndpointKey, JetstreamEndpointValue,
-
ModCursorKey, ModCursorValue, ModQueueItemKey, ModQueueItemStringValue, ModQueueItemValue,
-
RollupCursorKey, RollupCursorValue, SeenCounter,
-
};
-
use crate::{
-
CollectionSamples, CreateRecord, DeleteAccount, Did, EventBatch, ModifyRecord, Nsid, RecordKey,
-
};
-
use fjall::{
-
Batch as FjallBatch, CompressionType, Config, Keyspace, PartitionCreateOptions, PartitionHandle,
-
};
-
use jetstream::events::Cursor;
-
use std::collections::HashMap;
-
use std::path::{Path, PathBuf};
-
use std::time::{Duration, Instant};
-
use tokio::sync::mpsc::Receiver;
-
use tokio::time::{interval_at, sleep};
-
-
/// Commit the RW batch immediately if this number of events have been read off the mod queue
-
const MAX_BATCHED_RW_EVENTS: usize = 18;
-
-
/// Commit the RW batch immediately if this number of records is reached
-
///
-
/// there are probably some efficiency gains for higher, at cost of more memory.
-
/// interestingly, this kind of sets a priority weight for the RW loop:
-
/// - doing more work whenever scheduled means getting more CPU time in general
-
///
-
/// this is higher than [MAX_BATCHED_RW_EVENTS] because account-deletes can have lots of items
-
const MAX_BATCHED_RW_ITEMS: usize = 24;
-
-
#[derive(Clone)]
-
struct Db {
-
keyspace: Keyspace,
-
partition: PartitionHandle,
-
}
-
-
/**
-
* data format, roughly:
-
*
-
* Global Meta:
-
* ["js_cursor"] => js_cursor(u64), // used as global sequence
-
* ["js_endpoint"] => &str, // checked on startup because jetstream instance cursors are not interchangeable
-
* ["mod_cursor"] => js_cursor(u64);
-
* ["rollup_cursor"] => [js_cursor|collection]; // how far the rollup helper has progressed
-
* Mod queue
-
* ["mod_queue"|js_cursor] => one of {
-
* DeleteAccount(did) // delete all account content older than cursor
-
* DeleteRecord(did, collection, rkey) // delete record older than cursor
-
* UpdateRecord(did, collection, rkey, new_record) // delete + put, but don't delete if cursor is newer
-
* }
-
* Collection and rollup meta:
-
* ["seen_by_js_cursor_collection"|js_cursor|collection] => u64 // batched total, gets cleaned up by rollup
-
* ["total_by_collection"|collection] => [u64, js_cursor] // rollup; live total requires scanning seen_by_collection after js_cursor
-
* ["hour_by_collection"|hour(u64)|collection] => u64 // rollup from seen_by_js_cursor_collection
-
* Samples:
-
* ["by_collection"|collection|js_cursor] => [did|rkey|record]
-
* ["by_id"|did|collection|rkey|js_cursor] => [] // required to support deletes; did first prefix for account deletes.
-
*
-
* TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy
-
* fetching + caching on read.
-
**/
-
#[derive(Clone)]
-
pub struct Storage {
-
/// horrible: gate all db access behind this to force global serialization to avoid deadlock
-
db: Db,
-
}
-
-
impl Storage {
-
fn init_self(path: impl AsRef<Path>) -> anyhow::Result<Self> {
-
let keyspace = Config::new(path).fsync_ms(Some(4_000)).open()?;
-
let partition = keyspace.open_partition(
-
"default",
-
PartitionCreateOptions::default().compression(CompressionType::None),
-
)?;
-
Ok(Self {
-
db: Db {
-
keyspace,
-
partition,
-
},
-
})
-
}
-
-
pub async fn open(
-
path: PathBuf,
-
endpoint: &str,
-
force_endpoint: bool,
-
) -> anyhow::Result<(Self, Option<Cursor>)> {
-
let me = tokio::task::spawn_blocking(move || Storage::init_self(path)).await??;
-
-
let js_cursor = me.get_jetstream_cursor().await?;
-
-
if js_cursor.is_some() {
-
let Some(JetstreamEndpointValue(stored)) = me.get_jetstream_endpoint().await? else {
-
anyhow::bail!("found cursor but missing js_endpoint, refusing to start.");
-
};
-
if stored != endpoint {
-
if force_endpoint {
-
log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}");
-
me.set_jetstream_endpoint(endpoint).await?;
-
} else {
-
anyhow::bail!("stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start.");
-
}
-
}
-
} else {
-
me.set_jetstream_endpoint(endpoint).await?;
-
}
-
-
Ok((me, js_cursor))
-
}
-
-
/// Jetstream event batch receiver: writes without any reads
-
///
-
/// Events that require reads like record updates or delets are written to a queue
-
pub async fn receive(&self, mut receiver: Receiver<EventBatch>) -> anyhow::Result<()> {
-
// TODO: see rw_loop: enforce single-thread.
-
loop {
-
let t_sleep = Instant::now();
-
sleep(Duration::from_secs_f64(0.8)).await; // TODO: minimize during replay
-
let slept_for = t_sleep.elapsed();
-
let queue_size = receiver.len();
-
-
if let Some(event_batch) = receiver.recv().await {
-
log::trace!("write: received write batch");
-
let batch_summary = summarize_batch(&event_batch);
-
-
let last = event_batch.last_jetstream_cursor.clone(); // TODO: get this from the data. track last in consumer. compute or track first.
-
-
let db = &self.db;
-
let keyspace = db.keyspace.clone();
-
let partition = db.partition.clone();
-
-
let writer_t0 = Instant::now();
-
log::trace!("spawn_blocking for write batch");
-
tokio::task::spawn_blocking(move || {
-
DBWriter {
-
keyspace,
-
partition,
-
}
-
.write_batch(event_batch, last)
-
})
-
.await??;
-
log::trace!("write: back from blocking task, successfully wrote batch");
-
let wrote_for = writer_t0.elapsed();
-
-
println!("{batch_summary}, slept {slept_for: <12?}, wrote {wrote_for: <11?}, queue: {queue_size}");
-
} else {
-
log::error!("store consumer: receive channel failed (dropped/closed?)");
-
anyhow::bail!("receive channel closed");
-
}
-
}
-
}
-
-
/// Read-write loop reads from the queue for record-modifying events and does rollups
-
pub async fn rw_loop(&self) -> anyhow::Result<()> {
-
// TODO: lock so that only one rw loop can possibly be run. or even better, take a mutable resource thing to enforce at compile time.
-
-
let now = tokio::time::Instant::now();
-
let mut time_to_update_events = interval_at(now, Duration::from_secs_f64(0.051));
-
let mut time_to_trim_surplus = interval_at(
-
now + Duration::from_secs_f64(1.0),
-
Duration::from_secs_f64(3.3),
-
);
-
let mut time_to_roll_up = interval_at(
-
now + Duration::from_secs_f64(0.4),
-
Duration::from_secs_f64(0.9),
-
);
-
-
time_to_update_events.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
-
time_to_trim_surplus.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
-
time_to_roll_up.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
-
-
loop {
-
let keyspace = self.db.keyspace.clone();
-
let partition = self.db.partition.clone();
-
tokio::select! {
-
_ = time_to_update_events.tick() => {
-
log::debug!("beginning event update task");
-
tokio::task::spawn_blocking(move || Self::update_events(keyspace, partition)).await??;
-
log::debug!("finished event update task");
-
}
-
_ = time_to_trim_surplus.tick() => {
-
log::debug!("beginning record trim task");
-
tokio::task::spawn_blocking(move || Self::trim_old_events(keyspace, partition)).await??;
-
log::debug!("finished record trim task");
-
}
-
_ = time_to_roll_up.tick() => {
-
log::debug!("beginning rollup task");
-
tokio::task::spawn_blocking(move || Self::roll_up_counts(keyspace, partition)).await??;
-
log::debug!("finished rollup task");
-
},
-
}
-
}
-
}
-
-
fn update_events(keyspace: Keyspace, partition: PartitionHandle) -> anyhow::Result<()> {
-
// TODO: lock this to prevent concurrent rw
-
-
log::trace!("rw: getting rw cursor...");
-
let mod_cursor =
-
get_static::<ModCursorKey, ModCursorValue>(&partition)?.unwrap_or(Cursor::from_start());
-
let range = ModQueueItemKey::new(mod_cursor.clone()).range_to_prefix_end()?;
-
-
let mut db_batch = keyspace.batch();
-
let mut batched_rw_items = 0;
-
let mut any_tasks_found = false;
-
-
log::trace!("rw: iterating newer rw items...");
-
-
for (i, pair) in partition.range(range.clone()).enumerate() {
-
log::trace!("rw: iterating {i}");
-
any_tasks_found = true;
-
-
if i >= MAX_BATCHED_RW_EVENTS {
-
break;
-
}
-
-
let (key_bytes, val_bytes) = pair?;
-
let mod_key = match db_complete::<ModQueueItemKey>(&key_bytes) {
-
Ok(k) => k,
-
Err(EncodingError::WrongStaticPrefix(_, _)) => {
-
panic!("wsp: mod queue empty.");
-
}
-
otherwise => otherwise?,
-
};
-
-
let mod_value: ModQueueItemValue =
-
db_complete::<ModQueueItemStringValue>(&val_bytes)?.try_into()?;
-
-
log::trace!("rw: iterating {i}: sending to batcher {mod_key:?} => {mod_value:?}");
-
batched_rw_items += DBWriter {
-
keyspace: keyspace.clone(),
-
partition: partition.clone(),
-
}
-
.write_rw(&mut db_batch, mod_key, mod_value)?;
-
log::trace!("rw: iterating {i}: back from batcher.");
-
-
if batched_rw_items >= MAX_BATCHED_RW_ITEMS {
-
log::trace!("rw: iterating {i}: batch big enough, breaking out.");
-
break;
-
}
-
}
-
-
if !any_tasks_found {
-
log::trace!("rw: skipping batch commit since apparently no items were added (this is normal, skipping is new)");
-
// TODO: is this missing a chance to update the cursor?
-
return Ok(());
-
}
-
-
log::info!("rw: committing rw batch with {batched_rw_items} items (items != total inserts/deletes)...");
-
let r = db_batch.commit();
-
log::info!("rw: commit result: {r:?}");
-
r?;
-
Ok(())
-
}
-
-
fn trim_old_events(_keyspace: Keyspace, _partition: PartitionHandle) -> anyhow::Result<()> {
-
// we *could* keep a collection dirty list in memory to reduce the amount of searching here
-
// actually can we use seen_by_js_cursor_collection??
-
// * ["seen_by_js_cursor_collection"|js_cursor|collection] => u64
-
// -> the rollup cursor could handle trims.
-
-
// key structure:
-
// * ["by_collection"|collection|js_cursor] => [did|rkey|record]
-
-
// *new* strategy:
-
// 1. collect `collection`s seen during rollup
-
// 2. for each collected collection:
-
// 3. set up prefix iterator
-
// 4. reverse and try to walk back MAX_RETAINED steps
-
// 5. if we didn't end iteration yet, start deleting records (and their forward links) until we get to the end
-
-
// ... we can probably do even better with cursor ranges too, since we'll have a cursor range from rollup and it's in the by_collection key
-
-
Ok(())
-
}
-
-
fn roll_up_counts(_keyspace: Keyspace, _partition: PartitionHandle) -> anyhow::Result<()> {
-
Ok(())
-
}
-
-
pub async fn get_collection_records(
-
&self,
-
collection: &Nsid,
-
limit: usize,
-
) -> anyhow::Result<Vec<CreateRecord>> {
-
let partition = self.db.partition.clone();
-
let prefix = ByCollectionKey::prefix_from_collection(collection.clone())?;
-
tokio::task::spawn_blocking(move || {
-
let mut output = Vec::new();
-
-
for pair in partition.prefix(&prefix).rev().take(limit) {
-
let (k_bytes, v_bytes) = pair?;
-
let (_, cursor) = db_complete::<ByCollectionKey>(&k_bytes)?.into();
-
let (did, rkey, record) = db_complete::<ByCollectionValue>(&v_bytes)?.into();
-
output.push(CreateRecord {
-
did,
-
rkey,
-
record,
-
cursor,
-
})
-
}
-
Ok(output)
-
})
-
.await?
-
}
-
-
pub async fn get_meta_info(&self) -> anyhow::Result<StorageInfo> {
-
let db = &self.db;
-
let keyspace = db.keyspace.clone();
-
let partition = db.partition.clone();
-
tokio::task::spawn_blocking(move || {
-
Ok(StorageInfo {
-
keyspace_disk_space: keyspace.disk_space(),
-
keyspace_journal_count: keyspace.journal_count(),
-
keyspace_sequence: keyspace.instant(),
-
partition_approximate_len: partition.approximate_len(),
-
})
-
})
-
.await?
-
}
-
-
pub async fn get_collection_total_seen(&self, collection: &Nsid) -> anyhow::Result<u64> {
-
let partition = self.db.partition.clone();
-
let collection = collection.clone();
-
tokio::task::spawn_blocking(move || get_unrolled_collection_seen(&partition, collection))
-
.await?
-
}
-
-
pub async fn get_top_collections(&self) -> anyhow::Result<HashMap<String, u64>> {
-
let partition = self.db.partition.clone();
-
tokio::task::spawn_blocking(move || get_unrolled_top_collections(&partition)).await?
-
}
-
-
pub async fn get_jetstream_endpoint(&self) -> anyhow::Result<Option<JetstreamEndpointValue>> {
-
let partition = self.db.partition.clone();
-
tokio::task::spawn_blocking(move || {
-
get_static::<JetstreamEndpointKey, JetstreamEndpointValue>(&partition)
-
})
-
.await?
-
}
-
-
async fn set_jetstream_endpoint(&self, endpoint: &str) -> anyhow::Result<()> {
-
let partition = self.db.partition.clone();
-
let endpoint = endpoint.to_string();
-
tokio::task::spawn_blocking(move || {
-
insert_static::<JetstreamEndpointKey>(&partition, JetstreamEndpointValue(endpoint))
-
})
-
.await?
-
}
-
-
pub async fn get_jetstream_cursor(&self) -> anyhow::Result<Option<Cursor>> {
-
let partition = self.db.partition.clone();
-
tokio::task::spawn_blocking(move || {
-
get_static::<JetstreamCursorKey, JetstreamCursorValue>(&partition)
-
})
-
.await?
-
}
-
-
pub async fn get_mod_cursor(&self) -> anyhow::Result<Option<Cursor>> {
-
let partition = self.db.partition.clone();
-
tokio::task::spawn_blocking(move || get_static::<ModCursorKey, ModCursorValue>(&partition))
-
.await?
-
}
-
}
-
-
/// Get a value from a fixed key
-
fn get_static<K: StaticStr, V: DbBytes>(partition: &PartitionHandle) -> anyhow::Result<Option<V>> {
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
-
let value = partition
-
.get(&key_bytes)?
-
.map(|value_bytes| db_complete(&value_bytes))
-
.transpose()?;
-
Ok(value)
-
}
-
-
/// Set a value to a fixed key
-
fn insert_static<K: StaticStr>(
-
partition: &PartitionHandle,
-
value: impl DbBytes,
-
) -> anyhow::Result<()> {
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
-
let value_bytes = value.to_db_bytes()?;
-
partition.insert(&key_bytes, &value_bytes)?;
-
Ok(())
-
}
-
-
/// Set a value to a fixed key
-
fn insert_batch_static<K: StaticStr>(
-
batch: &mut FjallBatch,
-
partition: &PartitionHandle,
-
value: impl DbBytes,
-
) -> anyhow::Result<()> {
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
-
let value_bytes = value.to_db_bytes()?;
-
batch.insert(partition, &key_bytes, &value_bytes);
-
Ok(())
-
}
-
-
/// Remove a key
-
fn remove_batch<K: DbBytes>(
-
batch: &mut FjallBatch,
-
partition: &PartitionHandle,
-
key: K,
-
) -> Result<(), EncodingError> {
-
let key_bytes = key.to_db_bytes()?;
-
batch.remove(partition, &key_bytes);
-
Ok(())
-
}
-
-
/// Get stats that haven't been rolled up yet
-
fn get_unrolled_collection_seen(
-
partition: &PartitionHandle,
-
collection: Nsid,
-
) -> anyhow::Result<u64> {
-
let range =
-
if let Some(cursor_value) = get_static::<RollupCursorKey, RollupCursorValue>(partition)? {
-
eprintln!("found existing cursor");
-
let key: ByCursorSeenKey = cursor_value.into();
-
key.range_from()?
-
} else {
-
eprintln!("cursor from start.");
-
ByCursorSeenKey::full_range()?
-
};
-
-
let mut collection_total = 0;
-
-
let mut scanned = 0;
-
let mut rolled = 0;
-
-
for pair in partition.range(range) {
-
let (key_bytes, value_bytes) = pair?;
-
let key = db_complete::<ByCursorSeenKey>(&key_bytes)?;
-
let val = db_complete::<ByCursorSeenValue>(&value_bytes)?;
-
-
if *key.collection() == collection {
-
let SeenCounter(n) = val;
-
collection_total += n;
-
rolled += 1;
-
}
-
scanned += 1;
-
}
-
-
eprintln!("scanned: {scanned}, rolled: {rolled}");
-
-
Ok(collection_total)
-
}
-
-
fn get_unrolled_top_collections(
-
partition: &PartitionHandle,
-
) -> anyhow::Result<HashMap<String, u64>> {
-
let range =
-
if let Some(cursor_value) = get_static::<RollupCursorKey, RollupCursorValue>(partition)? {
-
eprintln!("found existing cursor");
-
let key: ByCursorSeenKey = cursor_value.into();
-
key.range_from()?
-
} else {
-
eprintln!("cursor from start.");
-
ByCursorSeenKey::full_range()?
-
};
-
-
let mut res = HashMap::new();
-
let mut scanned = 0;
-
-
for pair in partition.range(range) {
-
let (key_bytes, value_bytes) = pair?;
-
let key = db_complete::<ByCursorSeenKey>(&key_bytes)?;
-
let SeenCounter(n) = db_complete(&value_bytes)?;
-
-
*res.entry(key.collection().to_string()).or_default() += n;
-
-
scanned += 1;
-
}
-
-
eprintln!("scanned: {scanned} seen-counts.");
-
-
Ok(res)
-
}
-
-
impl DBWriter {
-
fn write_batch(self, event_batch: EventBatch, last: Option<Cursor>) -> anyhow::Result<()> {
-
let mut db_batch = self.keyspace.batch();
-
self.add_record_creates(&mut db_batch, event_batch.record_creates)?;
-
self.add_record_modifies(&mut db_batch, event_batch.record_modifies)?;
-
self.add_account_removes(&mut db_batch, event_batch.account_removes)?;
-
if let Some(cursor) = last {
-
insert_batch_static::<JetstreamCursorKey>(&mut db_batch, &self.partition, cursor)?;
-
}
-
log::info!("write: committing write batch...");
-
let r = db_batch.commit();
-
log::info!("write: commit result: {r:?}");
-
r?;
-
Ok(())
-
}
-
-
fn write_rw(
-
self,
-
db_batch: &mut FjallBatch,
-
mod_key: ModQueueItemKey,
-
mod_value: ModQueueItemValue,
-
) -> anyhow::Result<usize> {
-
// update the current rw cursor to this item (atomically with the batch if it succeeds)
-
let mod_cursor: Cursor = (&mod_key).into();
-
insert_batch_static::<ModCursorKey>(db_batch, &self.partition, mod_cursor.clone())?;
-
-
let items_modified = match mod_value {
-
ModQueueItemValue::DeleteAccount(did) => {
-
log::trace!("rw: batcher: delete account...");
-
let (items, finished) = self.delete_account(db_batch, mod_cursor, did)?;
-
log::trace!("rw: batcher: back from delete account (finished? {finished})");
-
if finished {
-
// only remove the queued rw task if we have actually completed its account removal work
-
remove_batch::<ModQueueItemKey>(db_batch, &self.partition, mod_key)?;
-
items + 1
-
} else {
-
items
-
}
-
}
-
ModQueueItemValue::DeleteRecord(did, collection, rkey) => {
-
log::trace!("rw: batcher: delete record...");
-
let items = self.delete_record(db_batch, mod_cursor, did, collection, rkey)?;
-
log::trace!("rw: batcher: back from delete record");
-
remove_batch::<ModQueueItemKey>(db_batch, &self.partition, mod_key)?;
-
items + 1
-
}
-
ModQueueItemValue::UpdateRecord(did, collection, rkey, record) => {
-
let items =
-
self.update_record(db_batch, mod_cursor, did, collection, rkey, record)?;
-
remove_batch::<ModQueueItemKey>(db_batch, &self.partition, mod_key)?;
-
items + 1
-
}
-
};
-
Ok(items_modified)
-
}
-
-
fn update_record(
-
&self,
-
db_batch: &mut FjallBatch,
-
cursor: Cursor,
-
did: Did,
-
collection: Nsid,
-
rkey: RecordKey,
-
record: serde_json::Value,
-
) -> anyhow::Result<usize> {
-
// 1. delete any existing versions older than us
-
let items_deleted = self.delete_record(
-
db_batch,
-
cursor.clone(),
-
did.clone(),
-
collection.clone(),
-
rkey.clone(),
-
)?;
-
-
// 2. insert the updated version, at our new cursor
-
self.add_record(db_batch, cursor, did, collection, rkey, record)?;
-
-
let items_total = items_deleted + 1;
-
Ok(items_total)
-
}
-
-
fn delete_record(
-
&self,
-
db_batch: &mut FjallBatch,
-
cursor: Cursor,
-
did: Did,
-
collection: Nsid,
-
rkey: RecordKey,
-
) -> anyhow::Result<usize> {
-
let key_prefix_bytes =
-
ByIdKey::record_prefix(did.clone(), collection.clone(), rkey.clone()).to_db_bytes()?;
-
-
// put the cursor of the actual deletion event in to prevent prefix iter from touching newer docs
-
let key_limit =
-
ByIdKey::new(did, collection.clone(), rkey, cursor.clone()).to_db_bytes()?;
-
-
let mut items_removed = 0;
-
-
log::trace!("delete_record: iterate over up to current cursor...");
-
-
for (i, pair) in self
-
.partition
-
.range(key_prefix_bytes..key_limit)
-
.enumerate()
-
{
-
log::trace!("delete_record iter {i}: found");
-
// find all (hopefully 1)
-
let (key_bytes, _) = pair?;
-
let key = db_complete::<ByIdKey>(&key_bytes)?;
-
let found_cursor = key.cursor();
-
if found_cursor > cursor {
-
// we are *only* allowed to delete records that came before the record delete event
-
// log::trace!("delete_record: found (and ignoring) newer version(s). key: {key:?}");
-
panic!("wtf, found newer version than cursor limit we tried to set.");
-
// break;
-
}
-
-
// remove the by_id entry
-
db_batch.remove(&self.partition, key_bytes);
-
-
// remove its record sample
-
let by_collection_key_bytes =
-
ByCollectionKey::new(collection.clone(), found_cursor).to_db_bytes()?;
-
db_batch.remove(&self.partition, by_collection_key_bytes);
-
-
items_removed += 1;
-
}
-
-
// if items_removed > 1 {
-
// log::trace!("odd, removed {items_removed} records for one record removal:");
-
// for (i, pair) in self.partition.prefix(&key_prefix_bytes).enumerate() {
-
// // find all (hopefully 1)
-
// let (key_bytes, _) = pair?;
-
// let found_cursor = db_complete::<ByIdKey>(&key_bytes)?.cursor();
-
// if found_cursor > cursor {
-
// break;
-
// }
-
-
// let key = db_complete::<ByIdKey>(&key_bytes)?;
-
// log::trace!(" {i}: key {key:?}");
-
// }
-
// }
-
Ok(items_removed)
-
}
-
-
fn delete_account(
-
&self,
-
db_batch: &mut FjallBatch,
-
cursor: Cursor,
-
did: Did,
-
) -> anyhow::Result<(usize, bool)> {
-
let key_prefix_bytes = ByIdKey::did_prefix(did).to_db_bytes()?;
-
-
let mut items_added = 0;
-
-
for pair in self.partition.prefix(&key_prefix_bytes) {
-
let (key_bytes, _) = pair?;
-
-
let (_, collection, _rkey, found_cursor) = db_complete::<ByIdKey>(&key_bytes)?.into();
-
if found_cursor > cursor {
-
log::trace!(
-
"delete account: found (and ignoring) newer records than the delete event??"
-
);
-
continue;
-
}
-
-
// remove the by_id entry
-
db_batch.remove(&self.partition, key_bytes);
-
-
// remove its record sample
-
let by_collection_key_bytes =
-
ByCollectionKey::new(collection, found_cursor).to_db_bytes()?;
-
db_batch.remove(&self.partition, by_collection_key_bytes);
-
-
items_added += 1;
-
if items_added >= MAX_BATCHED_RW_ITEMS {
-
return Ok((items_added, false)); // there might be more records but we've done enough for this batch
-
}
-
}
-
-
Ok((items_added, true))
-
}
-
-
fn add_record_creates(
-
&self,
-
db_batch: &mut FjallBatch,
-
record_creates: HashMap<Nsid, CollectionSamples>,
-
) -> anyhow::Result<()> {
-
for (
-
collection,
-
CollectionSamples {
-
total_seen,
-
samples,
-
},
-
) in record_creates.into_iter()
-
{
-
if let Some(last_record) = &samples.back() {
-
db_batch.insert(
-
&self.partition,
-
ByCursorSeenKey::new(last_record.cursor.clone(), collection.clone())
-
.to_db_bytes()?,
-
ByCursorSeenValue::new(total_seen as u64).to_db_bytes()?,
-
);
-
} else {
-
log::error!(
-
"collection samples should only exist when at least one sample has been added"
-
);
-
}
-
-
for CreateRecord {
-
did,
-
rkey,
-
cursor,
-
record,
-
} in samples.into_iter().rev()
-
{
-
self.add_record(db_batch, cursor, did, collection.clone(), rkey, record)?;
-
}
-
}
-
Ok(())
-
}
-
-
fn add_record(
-
&self,
-
db_batch: &mut FjallBatch,
-
cursor: Cursor,
-
did: Did,
-
collection: Nsid,
-
rkey: RecordKey,
-
record: serde_json::Value,
-
) -> anyhow::Result<()> {
-
// ["by_collection"|collection|js_cursor] => [did|rkey|record]
-
db_batch.insert(
-
&self.partition,
-
ByCollectionKey::new(collection.clone(), cursor.clone()).to_db_bytes()?,
-
ByCollectionValue::new(did.clone(), rkey.clone(), record).to_db_bytes()?,
-
);
-
-
// ["by_id"|did|collection|rkey|js_cursor] => [] // required to support deletes; did first prefix for account deletes.
-
db_batch.insert(
-
&self.partition,
-
ByIdKey::new(did, collection.clone(), rkey, cursor).to_db_bytes()?,
-
ByIdValue::default().to_db_bytes()?,
-
);
-
-
Ok(())
-
}
-
-
fn add_record_modifies(
-
&self,
-
db_batch: &mut FjallBatch,
-
record_modifies: Vec<ModifyRecord>,
-
) -> anyhow::Result<()> {
-
for modification in record_modifies {
-
let (cursor, db_val) = match modification {
-
ModifyRecord::Update(u) => (
-
u.cursor,
-
ModQueueItemValue::UpdateRecord(u.did, u.collection, u.rkey, u.record),
-
),
-
ModifyRecord::Delete(d) => (
-
d.cursor,
-
ModQueueItemValue::DeleteRecord(d.did, d.collection, d.rkey),
-
),
-
};
-
db_batch.insert(
-
&self.partition,
-
ModQueueItemKey::new(cursor).to_db_bytes()?,
-
db_val.to_db_bytes()?,
-
);
-
}
-
Ok(())
-
}
-
-
fn add_account_removes(
-
&self,
-
db_batch: &mut FjallBatch,
-
account_removes: Vec<DeleteAccount>,
-
) -> anyhow::Result<()> {
-
for deletion in account_removes {
-
db_batch.insert(
-
&self.partition,
-
ModQueueItemKey::new(deletion.cursor).to_db_bytes()?,
-
ModQueueItemValue::DeleteAccount(deletion.did).to_db_bytes()?,
-
);
-
}
-
Ok(())
-
}
-
}
-
-
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
-
pub struct StorageInfo {
-
pub keyspace_disk_space: u64,
-
pub keyspace_journal_count: usize,
-
pub keyspace_sequence: u64,
-
pub partition_approximate_len: usize,
-
}
-
-
struct DBWriter {
-
keyspace: Keyspace,
-
partition: PartitionHandle,
-
}
-
-
////////// temp stuff to remove:
-
-
fn summarize_batch(batch: &EventBatch) -> String {
-
let EventBatch {
-
record_creates,
-
record_modifies,
-
account_removes,
-
last_jetstream_cursor,
-
..
-
} = batch;
-
let total_records: usize = record_creates.values().map(|v| v.total_seen).sum();
-
let total_samples: usize = record_creates.values().map(|v| v.samples.len()).sum();
-
format!(
-
"batch of {total_samples: >3} samples from {total_records: >4} records in {: >2} collections, {: >3} modifies, {} acct removes, cursor {: <12?}",
-
record_creates.len(),
-
record_modifies.len(),
-
account_removes.len(),
-
last_jetstream_cursor.clone().map(|c| c.elapsed())
-
)
-
}
+312 -212
ufos/src/store_types.rs
···
use crate::db_types::{
-
DbBytes, DbConcat, DbEmpty, DbStaticStr, EncodingError, StaticStr, UseBincodePlz,
+
DbBytes, DbConcat, DbStaticStr, EncodingError, SerdeBytes, StaticStr, UseBincodePlz,
};
-
use crate::{Cursor, Did, Nsid, RecordKey};
+
use crate::{Cursor, Did, Nsid, PutAction, RecordKey, UFOsCommit};
use bincode::{Decode, Encode};
+
use cardinality_estimator::CardinalityEstimator;
use std::ops::Range;
/// key format: ["js_cursor"]
···
}
pub type JetstreamCursorValue = Cursor;
-
/// key format: ["mod_cursor"]
+
/// key format: ["rollup_cursor"]
#[derive(Debug, PartialEq)]
-
pub struct ModCursorKey {}
-
impl StaticStr for ModCursorKey {
+
pub struct NewRollupCursorKey {}
+
impl StaticStr for NewRollupCursorKey {
fn static_str() -> &'static str {
-
"mod_cursor"
+
"rollup_cursor"
}
}
-
pub type ModCursorValue = Cursor;
+
// pub type NewRollupCursorKey = DbStaticStr<_NewRollupCursorKey>;
+
/// value format: [rollup_cursor(Cursor)|collection(Nsid)]
+
pub type NewRollupCursorValue = Cursor;
-
/// key format: ["rollup_cursor"]
+
/// key format: ["js_endpoint"]
#[derive(Debug, PartialEq)]
-
pub struct RollupCursorKey {}
-
impl StaticStr for RollupCursorKey {
+
pub struct TakeoffKey {}
+
impl StaticStr for TakeoffKey {
fn static_str() -> &'static str {
-
"rollup_cursor"
+
"takeoff"
}
}
-
/// value format: [rollup_cursor(Cursor)|collection(Nsid)]
-
pub type RollupCursorValue = DbConcat<Cursor, Nsid>;
+
pub type TakeoffValue = Cursor;
/// key format: ["js_endpoint"]
#[derive(Debug, PartialEq)]
···
}
}
-
#[derive(Debug, Clone, Encode, Decode)]
-
pub struct SeenCounter(pub u64);
-
impl SeenCounter {
-
pub fn new(n: u64) -> Self {
-
Self(n)
+
pub type NsidRecordFeedKey = DbConcat<Nsid, Cursor>;
+
impl NsidRecordFeedKey {
+
pub fn collection(&self) -> &Nsid {
+
&self.prefix
+
}
+
pub fn cursor(&self) -> Cursor {
+
self.suffix
}
}
-
impl UseBincodePlz for SeenCounter {}
+
pub type NsidRecordFeedVal = DbConcat<Did, DbConcat<RecordKey, String>>;
+
impl NsidRecordFeedVal {
+
pub fn did(&self) -> &Did {
+
&self.prefix
+
}
+
pub fn rkey(&self) -> &RecordKey {
+
&self.suffix.prefix
+
}
+
pub fn rev(&self) -> &str {
+
&self.suffix.suffix
+
}
+
}
+
impl From<(&Did, &RecordKey, &str)> for NsidRecordFeedVal {
+
fn from((did, rkey, rev): (&Did, &RecordKey, &str)) -> Self {
+
Self::from_pair(
+
did.clone(),
+
DbConcat::from_pair(rkey.clone(), rev.to_string()),
+
)
+
}
+
}
-
#[derive(Debug, PartialEq)]
-
pub struct _ByCollectionStaticStr {}
-
impl StaticStr for _ByCollectionStaticStr {
-
fn static_str() -> &'static str {
-
"by_collection"
+
pub type RecordLocationKey = DbConcat<Did, DbConcat<Nsid, RecordKey>>;
+
impl RecordLocationKey {
+
pub fn did(&self) -> &Did {
+
&self.prefix
+
}
+
pub fn collection(&self) -> &Nsid {
+
&self.suffix.prefix
+
}
+
pub fn rkey(&self) -> &RecordKey {
+
&self.suffix.suffix
}
}
-
type ByCollectionPrefix = DbStaticStr<_ByCollectionStaticStr>;
-
/// key format: ["by_collection"|collection|js_cursor]
-
pub type ByCollectionKey = DbConcat<DbConcat<ByCollectionPrefix, Nsid>, Cursor>;
-
impl ByCollectionKey {
-
pub fn new(collection: Nsid, cursor: Cursor) -> Self {
-
Self {
-
prefix: DbConcat::from_pair(Default::default(), collection),
-
suffix: cursor,
-
}
-
}
-
pub fn prefix_from_collection(collection: Nsid) -> Result<Vec<u8>, EncodingError> {
-
DbConcat::from_pair(ByCollectionPrefix::default(), collection).to_db_bytes()
+
impl From<(&UFOsCommit, &Nsid)> for RecordLocationKey {
+
fn from((commit, collection): (&UFOsCommit, &Nsid)) -> Self {
+
Self::from_pair(
+
commit.did.clone(),
+
DbConcat::from_pair(collection.clone(), commit.rkey.clone()),
+
)
}
}
-
impl From<ByCollectionKey> for (Nsid, Cursor) {
-
fn from(k: ByCollectionKey) -> Self {
-
(k.prefix.suffix, k.suffix)
+
impl From<(&NsidRecordFeedKey, &NsidRecordFeedVal)> for RecordLocationKey {
+
fn from((key, val): (&NsidRecordFeedKey, &NsidRecordFeedVal)) -> Self {
+
Self::from_pair(
+
val.did().clone(),
+
DbConcat::from_pair(key.collection().clone(), val.rkey().clone()),
+
)
}
}
#[derive(Debug, PartialEq, Encode, Decode)]
-
pub struct ByCollectionValueInfo {
-
#[bincode(with_serde)]
-
pub did: Did,
-
#[bincode(with_serde)]
-
pub rkey: RecordKey,
+
pub struct RecordLocationMeta {
+
cursor: u64, // ugh no bincode impl
+
pub is_update: bool,
+
pub rev: String,
}
-
impl UseBincodePlz for ByCollectionValueInfo {}
-
/// value format: contains did, rkey, record
-
pub type ByCollectionValue = DbConcat<ByCollectionValueInfo, serde_json::Value>;
-
impl ByCollectionValue {
-
pub fn new(did: Did, rkey: RecordKey, record: serde_json::Value) -> Self {
-
Self {
-
prefix: ByCollectionValueInfo { did, rkey },
-
suffix: record,
-
}
+
impl RecordLocationMeta {
+
pub fn cursor(&self) -> Cursor {
+
Cursor::from_raw_u64(self.cursor)
}
}
-
impl From<ByCollectionValue> for (Did, RecordKey, serde_json::Value) {
-
fn from(v: ByCollectionValue) -> Self {
-
(v.prefix.did, v.prefix.rkey, v.suffix)
+
impl UseBincodePlz for RecordLocationMeta {}
+
+
#[derive(Debug, Clone, PartialEq)]
+
pub struct RecordRawValue(Vec<u8>);
+
impl DbBytes for RecordRawValue {
+
fn to_db_bytes(&self) -> Result<std::vec::Vec<u8>, EncodingError> {
+
self.0.to_db_bytes()
+
}
+
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
+
let (v, n) = DbBytes::from_db_bytes(bytes)?;
+
Ok((Self(v), n))
+
}
+
}
+
impl From<Box<serde_json::value::RawValue>> for RecordRawValue {
+
fn from(v: Box<serde_json::value::RawValue>) -> Self {
+
Self(v.get().into())
+
}
+
}
+
impl TryFrom<RecordRawValue> for Box<serde_json::value::RawValue> {
+
type Error = EncodingError;
+
fn try_from(rrv: RecordRawValue) -> Result<Self, Self::Error> {
+
let s = String::from_utf8(rrv.0)?;
+
let rv = serde_json::value::RawValue::from_string(s)?;
+
Ok(rv)
+
}
+
}
+
+
pub type RecordLocationVal = DbConcat<RecordLocationMeta, RecordRawValue>;
+
impl From<(Cursor, &str, PutAction)> for RecordLocationVal {
+
fn from((cursor, rev, put): (Cursor, &str, PutAction)) -> Self {
+
let meta = RecordLocationMeta {
+
cursor: cursor.to_raw_u64(),
+
is_update: put.is_update,
+
rev: rev.to_string(),
+
};
+
Self::from_pair(meta, put.record.into())
}
}
#[derive(Debug, PartialEq)]
-
pub struct _ByIdStaticStr {}
-
impl StaticStr for _ByIdStaticStr {
+
pub struct _LiveRecordsStaticStr {}
+
impl StaticStr for _LiveRecordsStaticStr {
fn static_str() -> &'static str {
-
"by_id"
+
"live_counts"
}
}
-
type ByIdStaticPrefix = DbStaticStr<_ByIdStaticStr>;
-
pub type ByIdDidPrefix = DbConcat<ByIdStaticPrefix, Did>;
-
pub type ByIdCollectionPrefix = DbConcat<ByIdDidPrefix, Nsid>;
-
pub type ByIdRecordPrefix = DbConcat<ByIdCollectionPrefix, RecordKey>;
-
/// look up records by user or directly, instead of by collections
-
///
-
/// required to support deletes; did first prefix for account deletes.
-
/// key format: ["by_id"|did|collection|rkey|js_cursor]
-
pub type ByIdKey = DbConcat<ByIdRecordPrefix, Cursor>;
-
impl ByIdKey {
-
pub fn new(did: Did, collection: Nsid, rkey: RecordKey, cursor: Cursor) -> Self {
-
Self::from_pair(Self::record_prefix(did, collection, rkey), cursor)
+
+
type LiveCountsStaticPrefix = DbStaticStr<_LiveRecordsStaticStr>;
+
type LiveCountsCursorPrefix = DbConcat<LiveCountsStaticPrefix, Cursor>;
+
pub type LiveCountsKey = DbConcat<LiveCountsCursorPrefix, Nsid>;
+
impl LiveCountsKey {
+
pub fn range_from_cursor(cursor: Cursor) -> Result<Range<Vec<u8>>, EncodingError> {
+
let prefix = LiveCountsCursorPrefix::from_pair(Default::default(), cursor);
+
prefix.range_to_prefix_end()
}
-
pub fn record_prefix(did: Did, collection: Nsid, rkey: RecordKey) -> ByIdRecordPrefix {
-
ByIdRecordPrefix {
-
prefix: ByIdCollectionPrefix {
-
prefix: Self::did_prefix(did),
-
suffix: collection,
-
},
-
suffix: rkey,
-
}
+
pub fn cursor(&self) -> Cursor {
+
self.prefix.suffix
}
-
pub fn did_prefix(did: Did) -> ByIdDidPrefix {
-
ByIdDidPrefix::from_pair(Default::default(), did)
-
}
-
pub fn cursor(&self) -> Cursor {
-
self.suffix.clone()
+
pub fn collection(&self) -> &Nsid {
+
&self.suffix
}
}
-
impl From<ByIdKey> for (Did, Nsid, RecordKey, Cursor) {
-
fn from(k: ByIdKey) -> Self {
-
(
-
k.prefix.prefix.prefix.suffix,
-
k.prefix.prefix.suffix,
-
k.prefix.suffix,
-
k.suffix,
+
impl From<(Cursor, &Nsid)> for LiveCountsKey {
+
fn from((cursor, collection): (Cursor, &Nsid)) -> Self {
+
Self::from_pair(
+
LiveCountsCursorPrefix::from_pair(Default::default(), cursor),
+
collection.clone(),
)
}
}
+
#[derive(Debug, PartialEq, Decode, Encode)]
+
pub struct TotalRecordsValue(pub u64);
+
impl UseBincodePlz for TotalRecordsValue {}
-
pub type ByIdValue = DbEmpty;
+
#[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)]
+
pub struct EstimatedDidsValue(pub CardinalityEstimator<Did>);
+
impl SerdeBytes for EstimatedDidsValue {}
+
impl DbBytes for EstimatedDidsValue {
+
#[cfg(test)]
+
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
+
SerdeBytes::to_bytes(self)
+
}
+
#[cfg(test)]
+
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
+
SerdeBytes::from_bytes(bytes)
+
}
-
#[derive(Debug, PartialEq)]
-
pub struct _ByCursorSeenStaticStr {}
-
impl StaticStr for _ByCursorSeenStaticStr {
-
fn static_str() -> &'static str {
-
"seen_by_js_cursor"
+
#[cfg(not(test))]
+
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
+
Ok(vec![1, 2, 3]) // TODO: un-stub when their heap overflow is fixed
+
}
+
#[cfg(not(test))]
+
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
+
if bytes.len() < 3 {
+
return Err(EncodingError::DecodeNotEnoughBytes);
+
}
+
Ok((Self(CardinalityEstimator::new()), 3)) // TODO: un-stub when their heap overflow is fixed
}
}
-
type ByCursorSeenPrefix = DbStaticStr<_ByCursorSeenStaticStr>;
-
type ByCursorSeenCursorPrefix = DbConcat<ByCursorSeenPrefix, Cursor>;
-
/// key format: ["seen_by_js_cursor"|js_cursor|collection]
-
pub type ByCursorSeenKey = DbConcat<ByCursorSeenCursorPrefix, Nsid>;
-
impl ByCursorSeenKey {
-
pub fn new(cursor: Cursor, nsid: Nsid) -> Self {
+
+
pub type CountsValue = DbConcat<TotalRecordsValue, EstimatedDidsValue>;
+
impl CountsValue {
+
pub fn new(total: u64, dids: CardinalityEstimator<Did>) -> Self {
Self {
-
prefix: DbConcat::from_pair(Default::default(), cursor),
-
suffix: nsid,
+
prefix: TotalRecordsValue(total),
+
suffix: EstimatedDidsValue(dids),
}
}
-
pub fn full_range() -> Result<Range<Vec<u8>>, EncodingError> {
-
let prefix = ByCursorSeenCursorPrefix::from_pair(Default::default(), Cursor::from_start());
-
prefix.range()
+
pub fn records(&self) -> u64 {
+
self.prefix.0
}
-
pub fn range_from(&self) -> Result<Range<Vec<u8>>, EncodingError> {
-
let start = self.to_db_bytes()?;
-
let end = self.prefix.range_end()?;
-
Ok(start..end)
+
pub fn dids(&self) -> &CardinalityEstimator<Did> {
+
&self.suffix.0
}
-
pub fn collection(&self) -> &Nsid {
-
&self.suffix
+
pub fn merge(&mut self, other: &Self) {
+
self.prefix.0 += other.records();
+
self.suffix.0.merge(other.dids());
}
}
-
impl From<RollupCursorValue> for ByCursorSeenKey {
-
fn from(v: RollupCursorValue) -> Self {
-
Self::new(v.prefix, v.suffix)
-
}
-
}
-
impl From<ByCursorSeenKey> for (Cursor, Nsid) {
-
fn from(k: ByCursorSeenKey) -> Self {
-
(k.prefix.suffix, k.suffix)
+
impl Default for CountsValue {
+
fn default() -> Self {
+
Self {
+
prefix: TotalRecordsValue(0),
+
suffix: EstimatedDidsValue(CardinalityEstimator::new()),
+
}
}
}
-
-
pub type ByCursorSeenValue = SeenCounter;
#[derive(Debug, PartialEq)]
-
pub struct _ModQueueItemStaticStr {}
-
impl StaticStr for _ModQueueItemStaticStr {
+
pub struct _DeleteAccountStaticStr {}
+
impl StaticStr for _DeleteAccountStaticStr {
fn static_str() -> &'static str {
-
"mod_queue"
+
"delete_acount"
}
}
-
pub type ModQueueItemPrefix = DbStaticStr<_ModQueueItemStaticStr>;
-
/// key format: ["mod_queue"|js_cursor]
-
pub type ModQueueItemKey = DbConcat<ModQueueItemPrefix, Cursor>;
-
impl ModQueueItemKey {
+
pub type DeleteAccountStaticPrefix = DbStaticStr<_DeleteAccountStaticStr>;
+
pub type DeleteAccountQueueKey = DbConcat<DeleteAccountStaticPrefix, Cursor>;
+
impl DeleteAccountQueueKey {
pub fn new(cursor: Cursor) -> Self {
Self::from_pair(Default::default(), cursor)
}
}
-
// todo: remove this? all we need is the ModCursorValue version?
-
impl From<ModQueueItemKey> for Cursor {
-
fn from(k: ModQueueItemKey) -> Self {
-
k.suffix
+
pub type DeleteAccountQueueVal = Did;
+
+
#[derive(Debug, PartialEq)]
+
pub struct _HourlyRollupStaticStr {}
+
impl StaticStr for _HourlyRollupStaticStr {
+
fn static_str() -> &'static str {
+
"hourly_counts"
}
}
-
impl From<&ModQueueItemKey> for ModCursorValue {
-
fn from(k: &ModQueueItemKey) -> Self {
-
k.suffix.clone()
+
pub type HourlyRollupStaticPrefix = DbStaticStr<_HourlyRollupStaticStr>;
+
pub type HourlyRollupKey = DbConcat<DbConcat<HourlyRollupStaticPrefix, HourTruncatedCursor>, Nsid>;
+
impl HourlyRollupKey {
+
pub fn new(hourly_cursor: HourTruncatedCursor, nsid: &Nsid) -> Self {
+
Self::from_pair(
+
DbConcat::from_pair(Default::default(), hourly_cursor),
+
nsid.clone(),
+
)
}
}
+
pub type HourlyRollupVal = CountsValue;
-
#[derive(Debug, Encode, Decode)]
-
pub enum ModQueueItemStringValue {
-
DeleteAccount(String), // did
-
DeleteRecord(String, String, String), // did, collection, rkey
-
UpdateRecord(String, String, String, String), // did, collection, rkey, json record
+
#[derive(Debug, PartialEq)]
+
pub struct _WeeklyRollupStaticStr {}
+
impl StaticStr for _WeeklyRollupStaticStr {
+
fn static_str() -> &'static str {
+
"weekly_counts"
+
}
}
-
impl UseBincodePlz for ModQueueItemStringValue {}
-
#[derive(Debug, Clone, PartialEq)]
-
pub enum ModQueueItemValue {
-
DeleteAccount(Did),
-
DeleteRecord(Did, Nsid, RecordKey),
-
UpdateRecord(Did, Nsid, RecordKey, serde_json::Value),
+
pub type WeeklyRollupStaticPrefix = DbStaticStr<_WeeklyRollupStaticStr>;
+
pub type WeeklyRollupKey = DbConcat<DbConcat<WeeklyRollupStaticPrefix, WeekTruncatedCursor>, Nsid>;
+
impl WeeklyRollupKey {
+
pub fn new(weekly_cursor: WeekTruncatedCursor, nsid: &Nsid) -> Self {
+
Self::from_pair(
+
DbConcat::from_pair(Default::default(), weekly_cursor),
+
nsid.clone(),
+
)
+
}
}
-
impl From<ModQueueItemValue> for ModQueueItemStringValue {
-
fn from(v: ModQueueItemValue) -> Self {
-
match v {
-
ModQueueItemValue::DeleteAccount(did) => {
-
ModQueueItemStringValue::DeleteAccount(did.to_string())
-
}
-
ModQueueItemValue::DeleteRecord(did, collection, rkey) => {
-
ModQueueItemStringValue::DeleteRecord(
-
did.to_string(),
-
collection.to_string(),
-
rkey.to_string(),
-
)
-
}
-
ModQueueItemValue::UpdateRecord(did, collection, rkey, record) => {
-
ModQueueItemStringValue::UpdateRecord(
-
did.to_string(),
-
collection.to_string(),
-
rkey.to_string(),
-
record.to_string(),
-
)
-
}
-
}
+
pub type WeeklyRollupVal = CountsValue;
+
+
#[derive(Debug, PartialEq)]
+
pub struct _AllTimeRollupStaticStr {}
+
impl StaticStr for _AllTimeRollupStaticStr {
+
fn static_str() -> &'static str {
+
"ever_counts"
}
}
-
impl TryFrom<ModQueueItemStringValue> for ModQueueItemValue {
-
type Error = EncodingError;
-
fn try_from(v: ModQueueItemStringValue) -> Result<Self, Self::Error> {
-
match v {
-
ModQueueItemStringValue::DeleteAccount(did) => Ok(ModQueueItemValue::DeleteAccount(
-
Did::new(did).map_err(EncodingError::BadAtriumStringType)?,
-
)),
-
ModQueueItemStringValue::DeleteRecord(did, collection, rkey) => {
-
Ok(ModQueueItemValue::DeleteRecord(
-
Did::new(did).map_err(EncodingError::BadAtriumStringType)?,
-
Nsid::new(collection).map_err(EncodingError::BadAtriumStringType)?,
-
RecordKey::new(rkey).map_err(EncodingError::BadAtriumStringType)?,
-
))
-
}
-
ModQueueItemStringValue::UpdateRecord(did, collection, rkey, record) => {
-
Ok(ModQueueItemValue::UpdateRecord(
-
Did::new(did).map_err(EncodingError::BadAtriumStringType)?,
-
Nsid::new(collection).map_err(EncodingError::BadAtriumStringType)?,
-
RecordKey::new(rkey).map_err(EncodingError::BadAtriumStringType)?,
-
record.parse()?,
-
))
-
}
+
pub type AllTimeRollupStaticPrefix = DbStaticStr<_AllTimeRollupStaticStr>;
+
pub type AllTimeRollupKey = DbConcat<AllTimeRollupStaticPrefix, Nsid>;
+
impl AllTimeRollupKey {
+
pub fn new(nsid: &Nsid) -> Self {
+
Self::from_pair(Default::default(), nsid.clone())
+
}
+
pub fn collection(&self) -> &Nsid {
+
&self.suffix
+
}
+
}
+
pub type AllTimeRollupVal = CountsValue;
+
+
#[derive(Debug, Copy, Clone, PartialEq, Hash, PartialOrd, Eq)]
+
pub struct TruncatedCursor<const MOD: u64>(u64);
+
impl<const MOD: u64> TruncatedCursor<MOD> {
+
fn truncate(raw: u64) -> u64 {
+
(raw / MOD) * MOD
+
}
+
pub fn try_from_raw_u64(time_us: u64) -> Result<Self, EncodingError> {
+
let rem = time_us % MOD;
+
if rem != 0 {
+
return Err(EncodingError::InvalidTruncated(MOD, rem));
}
+
Ok(Self(time_us))
+
}
+
pub fn try_from_cursor(cursor: Cursor) -> Result<Self, EncodingError> {
+
Self::try_from_raw_u64(cursor.to_raw_u64())
+
}
+
pub fn truncate_cursor(cursor: Cursor) -> Self {
+
let raw = cursor.to_raw_u64();
+
let truncated = Self::truncate(raw);
+
Self(truncated)
}
}
-
impl DbBytes for ModQueueItemValue {
-
fn to_db_bytes(&self) -> Result<std::vec::Vec<u8>, EncodingError> {
-
Into::<ModQueueItemStringValue>::into(self.clone()).to_db_bytes()
+
impl<const MOD: u64> From<TruncatedCursor<MOD>> for Cursor {
+
fn from(truncated: TruncatedCursor<MOD>) -> Self {
+
Cursor::from_raw_u64(truncated.0)
+
}
+
}
+
impl<const MOD: u64> From<Cursor> for TruncatedCursor<MOD> {
+
fn from(cursor: Cursor) -> Self {
+
Self::truncate_cursor(cursor)
+
}
+
}
+
impl<const MOD: u64> DbBytes for TruncatedCursor<MOD> {
+
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
+
let as_cursor: Cursor = (*self).into();
+
as_cursor.to_db_bytes()
}
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
-
let (stringy, n) = ModQueueItemStringValue::from_db_bytes(bytes)?;
-
let me = TryInto::<ModQueueItemValue>::try_into(stringy)?;
+
let (cursor, n) = Cursor::from_db_bytes(bytes)?;
+
let me = Self::try_from_cursor(cursor)?;
Ok((me, n))
}
}
+
const HOUR_IN_MICROS: u64 = 1_000_000 * 3600;
+
pub type HourTruncatedCursor = TruncatedCursor<HOUR_IN_MICROS>;
+
+
const WEEK_IN_MICROS: u64 = HOUR_IN_MICROS * 24 * 7;
+
pub type WeekTruncatedCursor = TruncatedCursor<WEEK_IN_MICROS>;
+
#[cfg(test)]
mod test {
-
use super::{ByCollectionKey, ByCollectionValue, Cursor, Did, EncodingError, Nsid, RecordKey};
+
use super::{
+
CardinalityEstimator, CountsValue, Cursor, Did, EncodingError, HourTruncatedCursor,
+
HourlyRollupKey, Nsid, HOUR_IN_MICROS,
+
};
use crate::db_types::DbBytes;
#[test]
-
fn test_by_collection_key() -> Result<(), EncodingError> {
+
fn test_by_hourly_rollup_key() -> Result<(), EncodingError> {
let nsid = Nsid::new("ab.cd.efg".to_string()).unwrap();
-
let original = ByCollectionKey::new(nsid.clone(), Cursor::from_raw_u64(456));
+
let original = HourlyRollupKey::new(Cursor::from_raw_u64(4567890).into(), &nsid);
let serialized = original.to_db_bytes()?;
-
let (restored, bytes_consumed) = ByCollectionKey::from_db_bytes(&serialized)?;
+
let (restored, bytes_consumed) = HourlyRollupKey::from_db_bytes(&serialized)?;
assert_eq!(restored, original);
assert_eq!(bytes_consumed, serialized.len());
let serialized_prefix = original.to_prefix_db_bytes()?;
-
assert!(serialized.starts_with(&serialized_prefix));
-
let just_prefix = ByCollectionKey::prefix_from_collection(nsid)?;
-
assert_eq!(just_prefix, serialized_prefix);
-
assert!(just_prefix.starts_with("by_collection".as_bytes()));
+
assert!(serialized_prefix.starts_with("hourly_counts".as_bytes()));
+
assert!(serialized_prefix.starts_with(&serialized_prefix));
Ok(())
}
#[test]
-
fn test_by_collection_value() -> Result<(), EncodingError> {
-
let did = Did::new("did:plc:inze6wrmsm7pjl7yta3oig77".to_string()).unwrap();
-
let rkey = RecordKey::new("asdfasdf".to_string()).unwrap();
-
let record = serde_json::Value::String("hellooooo".into());
+
fn test_by_hourly_rollup_value() -> Result<(), EncodingError> {
+
let mut estimator = CardinalityEstimator::new();
+
for i in 0..10 {
+
estimator.insert(&Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig7{i}")).unwrap());
+
}
+
let original = CountsValue::new(123, estimator.clone());
+
let serialized = original.to_db_bytes()?;
+
let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?;
+
assert_eq!(restored, original);
+
assert_eq!(bytes_consumed, serialized.len());
-
let original = ByCollectionValue::new(did, rkey, record);
+
for i in 10..1_000 {
+
estimator.insert(&Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig{i}")).unwrap());
+
}
+
let original = CountsValue::new(123, estimator);
let serialized = original.to_db_bytes()?;
-
let (restored, bytes_consumed) = ByCollectionValue::from_db_bytes(&serialized)?;
+
let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?;
assert_eq!(restored, original);
assert_eq!(bytes_consumed, serialized.len());
Ok(())
+
}
+
+
#[test]
+
fn test_hour_truncated_cursor() {
+
let us = Cursor::from_raw_u64(1_743_778_483_483_895);
+
let hr = HourTruncatedCursor::truncate_cursor(us);
+
let back: Cursor = hr.into();
+
assert!(back < us);
+
let diff = us.to_raw_u64() - back.to_raw_u64();
+
assert!(diff < HOUR_IN_MICROS);
+
}
+
+
#[test]
+
fn test_hour_truncated_cursor_already_truncated() {
+
let us = Cursor::from_raw_u64(1_743_775_200_000_000);
+
let hr = HourTruncatedCursor::truncate_cursor(us);
+
let back: Cursor = hr.into();
+
assert_eq!(back, us);
+
let diff = us.to_raw_u64() - back.to_raw_u64();
+
assert_eq!(diff, 0);
}
}