A better Rust ATProto crate

ipld value type to atproto data model first pass

Orual 9ced24d4 47857bff

Changed files
+493 -4
crates
jacquard-common
+101
Cargo.lock
···
]
[[package]]
name = "half"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
"multibase",
"multihash",
"ouroboros",
"regex",
"serde",
"serde_html_form",
···
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
]
[[package]]
name = "range-traits"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wasm-bindgen"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
name = "writeable"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
"quote",
"syn 2.0.106",
"synstructure",
]
[[package]]
···
]
[[package]]
+
name = "getrandom"
+
version = "0.3.3"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+
dependencies = [
+
"cfg-if",
+
"libc",
+
"r-efi",
+
"wasi",
+
]
+
+
[[package]]
name = "half"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
"multibase",
"multihash",
"ouroboros",
+
"rand",
"regex",
"serde",
"serde_html_form",
···
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
+
name = "ppv-lite86"
+
version = "0.2.21"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+
dependencies = [
+
"zerocopy",
+
]
+
+
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
]
[[package]]
+
name = "r-efi"
+
version = "5.3.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+
[[package]]
+
name = "rand"
+
version = "0.9.2"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+
dependencies = [
+
"rand_chacha",
+
"rand_core",
+
]
+
+
[[package]]
+
name = "rand_chacha"
+
version = "0.9.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+
dependencies = [
+
"ppv-lite86",
+
"rand_core",
+
]
+
+
[[package]]
+
name = "rand_core"
+
version = "0.9.3"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+
dependencies = [
+
"getrandom",
+
]
+
+
[[package]]
name = "range-traits"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
+
name = "wasi"
+
version = "0.14.7+wasi-0.2.4"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
+
dependencies = [
+
"wasip2",
+
]
+
+
[[package]]
+
name = "wasip2"
+
version = "1.0.1+wasi-0.2.4"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+
dependencies = [
+
"wit-bindgen",
+
]
+
+
[[package]]
name = "wasm-bindgen"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
+
name = "wit-bindgen"
+
version = "0.46.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+
+
[[package]]
name = "writeable"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
"quote",
"syn 2.0.106",
"synstructure",
+
]
+
+
[[package]]
+
name = "zerocopy"
+
version = "0.8.27"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
+
dependencies = [
+
"zerocopy-derive",
+
]
+
+
[[package]]
+
name = "zerocopy-derive"
+
version = "0.8.27"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
+
dependencies = [
+
"proc-macro2",
+
"quote",
+
"syn 2.0.106",
]
[[package]]
+1
crates/jacquard-common/Cargo.toml
···
multibase = "0.9.1"
multihash = "0.19.3"
ouroboros = "0.18.5"
regex = "1.11.3"
serde = { version = "1.0.227", features = ["derive"] }
serde_html_form = "0.2.8"
···
multibase = "0.9.1"
multihash = "0.19.3"
ouroboros = "0.18.5"
+
rand = "0.9.2"
regex = "1.11.3"
serde = { version = "1.0.227", features = ["derive"] }
serde_html_form = "0.2.8"
+103 -3
crates/jacquard-common/src/types/tid.rs
···
use smol_str::{SmolStr, SmolStrBuilder};
use std::fmt;
use std::sync::LazyLock;
use std::{ops::Deref, str::FromStr};
use crate::CowStr;
···
use crate::types::string::{AtStrError, StrParseKind};
use regex::Regex;
fn s32_encode(mut i: u64) -> SmolStr {
-
const S32_CHAR: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz";
-
let mut s = SmolStrBuilder::new();
for _ in 0..13 {
let c = i & 0x1F;
-
s.push(S32_CHAR[c as usize] as char);
i >>= 5;
}
···
Self(s32_encode(tid))
}
/// Construct a new [Tid] that represents the current time.
///
/// If you have multiple clock sources, you can use `clkid` to distinguish between them
···
}
}
impl FromStr for Tid {
type Err = AtStrError;
···
self.as_str()
}
}
···
use smol_str::{SmolStr, SmolStrBuilder};
use std::fmt;
use std::sync::LazyLock;
+
use std::time::SystemTime;
use std::{ops::Deref, str::FromStr};
use crate::CowStr;
···
use crate::types::string::{AtStrError, StrParseKind};
use regex::Regex;
+
const S32_CHAR: &str = "234567abcdefghijklmnopqrstuvwxyz";
+
fn s32_encode(mut i: u64) -> SmolStr {
let mut s = SmolStrBuilder::new();
for _ in 0..13 {
let c = i & 0x1F;
+
s.push(S32_CHAR.chars().nth(c as usize).unwrap());
i >>= 5;
}
···
Self(s32_encode(tid))
}
+
pub fn from_time(timestamp: usize, clkid: u32) -> Self {
+
let str = smol_str::format_smolstr!(
+
"{0}{1:2>2}",
+
s32_encode(timestamp as u64),
+
s32_encode(Into::<u32>::into(clkid) as u64)
+
);
+
Self(str)
+
}
+
+
pub fn timestamp(&self) -> usize {
+
s32decode(self.0[0..11].to_owned())
+
}
+
+
// newer > older
+
pub fn compare_to(&self, other: &Tid) -> i8 {
+
if self.0 > other.0 {
+
return 1;
+
}
+
if self.0 < other.0 {
+
return -1;
+
}
+
0
+
}
+
+
pub fn newer_than(&self, other: &Tid) -> bool {
+
self.compare_to(other) > 0
+
}
+
+
pub fn older_than(&self, other: &Tid) -> bool {
+
self.compare_to(other) < 0
+
}
+
+
pub fn next_str(prev: Option<Tid>) -> Result<Self, AtStrError> {
+
let prev = match prev {
+
None => None,
+
Some(prev) => Some(Tid::new(prev)?),
+
};
+
Ok(Ticker::new().next(prev))
+
}
+
/// Construct a new [Tid] that represents the current time.
///
/// If you have multiple clock sources, you can use `clkid` to distinguish between them
···
}
}
+
pub fn s32decode(s: String) -> usize {
+
let mut i: usize = 0;
+
for c in s.chars() {
+
i = i * 32 + S32_CHAR.chars().position(|x| x == c).unwrap();
+
}
+
i
+
}
+
impl FromStr for Tid {
type Err = AtStrError;
···
self.as_str()
}
}
+
+
/// Based on adenosine/adenosine/src/identifiers.rs
+
/// TODO: clean up and normalize stuff between this and the stuff pulled from atrium
+
pub struct Ticker {
+
last_timestamp: usize,
+
clock_id: u32,
+
}
+
+
impl Ticker {
+
pub fn new() -> Self {
+
let mut ticker = Self {
+
last_timestamp: 0,
+
// mask to 10 bits
+
clock_id: rand::random::<u32>() & 0x03FF,
+
};
+
// prime the pump
+
ticker.next(None);
+
ticker
+
}
+
+
pub fn next(&mut self, prev: Option<Tid>) -> Tid {
+
let now = SystemTime::now()
+
.duration_since(SystemTime::UNIX_EPOCH)
+
.expect("timestamp in micros since UNIX epoch")
+
.as_micros() as usize;
+
// mask to 53 bits
+
let now = now & 0x001FFFFFFFFFFFFF;
+
if now > self.last_timestamp {
+
self.last_timestamp = now;
+
} else {
+
self.last_timestamp += 1;
+
}
+
// 53 bits of millis
+
let micros = self.last_timestamp & 0x001FFFFFFFFFFFFF;
+
// 10 bits of clock ID
+
let clock_id = self.clock_id & 0x03FF;
+
+
let tid = Tid::from_time(micros, clock_id as u32);
+
match prev {
+
Some(ref prev) if tid.newer_than(prev) => tid,
+
Some(prev) => Tid::from_time(prev.timestamp() + 1, clock_id as u32),
+
None => tid,
+
}
+
}
+
}
+
+
impl Default for Ticker {
+
fn default() -> Self {
+
Self::new()
+
}
+
}
+288 -1
crates/jacquard-common/src/types/value.rs
···
prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD},
};
use bytes::Bytes;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use smol_str::{SmolStr, ToSmolStr};
use std::{collections::BTreeMap, str::FromStr};
···
Self::Null
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
···
let mut array = Vec::with_capacity(json.len());
for item in json {
array.push(Data::from_json(item));
}
Self(array)
}
···
Data::Object(Object(map))
}
-
//pub fn from_cbor(cbor: BTreeMap<String, ipld_core::ipld::Ipld>) -> Self {}
}
/// smarter parsing to avoid trying as many posibilities.
···
// or Ipld value is at least a string, and then we fall back to Object/Map.
_ => DataModelType::String(LexiconStringType::String),
}
}
pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> {
···
prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD},
};
use bytes::Bytes;
+
use ipld_core::ipld::Ipld;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use smol_str::{SmolStr, ToSmolStr};
use std::{collections::BTreeMap, str::FromStr};
···
Self::Null
}
}
+
+
pub fn from_cbor(cbor: &'s Ipld) -> Self {
+
match cbor {
+
Ipld::Null => Data::Null,
+
Ipld::Bool(bool) => Data::Boolean(*bool),
+
Ipld::Integer(int) => Data::Integer(*int as i64),
+
Ipld::Float(_) => todo!(),
+
Ipld::String(string) => Self::String(AtprotoStr::new(string)),
+
Ipld::Bytes(items) => Self::Bytes(Bytes::copy_from_slice(items.as_slice())),
+
Ipld::List(iplds) => Self::Array(Array::from_cbor(iplds)),
+
Ipld::Map(btree_map) => Object::from_cbor(btree_map),
+
Ipld::Link(cid) => Self::CidLink(Cid::ipld(*cid)),
+
}
+
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
···
let mut array = Vec::with_capacity(json.len());
for item in json {
array.push(Data::from_json(item));
+
}
+
Self(array)
+
}
+
pub fn from_cbor(cbor: &'s Vec<Ipld>) -> Self {
+
let mut array = Vec::with_capacity(cbor.len());
+
for item in cbor {
+
array.push(Data::from_cbor(item));
}
Self(array)
}
···
Data::Object(Object(map))
}
+
pub fn from_cbor(cbor: &'s BTreeMap<String, Ipld>) -> Data<'s> {
+
if let Some(Ipld::String(type_field)) = cbor.get("$type") {
+
if infer_from_type(type_field) == DataModelType::Blob {
+
if let Some(blob) = cbor_to_blob(cbor) {
+
return Data::Blob(blob);
+
}
+
}
+
}
+
let mut map = BTreeMap::new();
+
+
for (key, value) in cbor {
+
if key == "$type" {
+
continue; // skip, because we've already handled it
+
}
+
match string_key_type_guess(key) {
+
DataModelType::Null => {
+
if *value == Ipld::Null {
+
map.insert(key.to_smolstr(), Data::Null);
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::Boolean => {
+
if let Ipld::Bool(value) = value {
+
map.insert(key.to_smolstr(), Data::Boolean(*value));
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::Integer => {
+
if let Ipld::Integer(int) = value {
+
map.insert(key.to_smolstr(), Data::Integer(*int as i64));
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::Bytes => {
+
if let Ipld::Bytes(value) = value {
+
map.insert(key.to_smolstr(), Data::Bytes(Bytes::copy_from_slice(value)));
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::Blob => {
+
if let Ipld::Map(value) = value {
+
map.insert(key.to_smolstr(), Object::from_cbor(value));
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::Array => {
+
if let Ipld::List(value) = value {
+
map.insert(key.to_smolstr(), Data::Array(Array::from_cbor(value)));
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::Object => {
+
if let Ipld::Map(value) = value {
+
map.insert(key.to_smolstr(), Object::from_cbor(value));
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
DataModelType::String(string_type) => {
+
if let Ipld::String(value) = value {
+
match string_type {
+
LexiconStringType::Datetime => {
+
if let Ok(datetime) = Datetime::from_str(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Datetime(datetime)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::AtUri => {
+
if let Ok(value) = AtUri::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::AtUri(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Did => {
+
if let Ok(value) = Did::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Did(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Handle => {
+
if let Ok(value) = Handle::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Handle(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::AtIdentifier => {
+
if let Ok(value) = AtIdentifier::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::AtIdentifier(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Nsid => {
+
if let Ok(value) = Nsid::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Nsid(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Cid => {
+
if let Ok(value) = Cid::new(value.as_bytes()) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Cid(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Language => {
+
if let Ok(value) = Language::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Language(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Tid => {
+
if let Ok(value) = Tid::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Tid(value)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::RecordKey => {
+
if let Ok(value) = Rkey::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::RecordKey(RecordKey::from(value))),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::Uri(_) => {
+
if let Ok(uri) = Uri::new(value) {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::Uri(uri)),
+
);
+
} else {
+
map.insert(
+
key.to_smolstr(),
+
Data::String(AtprotoStr::String(value.into())),
+
);
+
}
+
}
+
LexiconStringType::String => {
+
map.insert(key.to_smolstr(), Data::String(parse_string(value)));
+
}
+
}
+
} else {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
_ => {
+
map.insert(key.to_smolstr(), Data::from_cbor(value));
+
}
+
}
+
}
+
+
Data::Object(Object(map))
+
}
}
/// smarter parsing to avoid trying as many posibilities.
···
// or Ipld value is at least a string, and then we fall back to Object/Map.
_ => DataModelType::String(LexiconStringType::String),
}
+
}
+
+
pub fn cbor_to_blob<'b>(blob: &'b BTreeMap<String, Ipld>) -> Option<Blob<'b>> {
+
let mime_type = blob.get("mimeType").and_then(|o| {
+
if let Ipld::String(string) = o {
+
Some(string)
+
} else {
+
None
+
}
+
});
+
if let Some(value) = blob.get("ref") {
+
if let Ipld::Map(value) = value {
+
if let Some(Ipld::String(value)) = value.get("$link") {
+
let size = blob.get("size").and_then(|o| {
+
if let Ipld::Integer(i) = o {
+
Some(*i as i64)
+
} else {
+
None
+
}
+
});
+
if let (Some(mime_type), Some(size)) = (mime_type, size) {
+
return Some(Blob {
+
r#ref: Cid::str(value),
+
mime_type: MimeType::raw(mime_type),
+
size: size as usize,
+
});
+
}
+
}
+
}
+
} else if let Some(Ipld::String(value)) = blob.get("cid") {
+
if let Some(mime_type) = mime_type {
+
return Some(Blob {
+
r#ref: Cid::str(value),
+
mime_type: MimeType::raw(mime_type),
+
size: 0,
+
});
+
}
+
}
+
+
None
}
pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> {