A better Rust ATProto crate

at-uri implementation reworked string types a bit to use SmolStr when useful, collection and literal traits, rkey types and traits

Orual c106f152 17b4b461

+23 -27
Cargo.lock
···
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
[[package]]
-
name = "bumpalo"
-
version = "3.19.0"
+
name = "borsh"
+
version = "1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce"
+
dependencies = [
+
"cfg_aliases",
+
]
[[package]]
-
name = "castaway"
-
version = "0.2.4"
+
name = "bumpalo"
+
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
-
dependencies = [
-
"rustversion",
-
]
+
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "cc"
···
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+
[[package]]
+
name = "cfg_aliases"
+
version = "0.2.1"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
···
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
-
name = "compact_str"
-
version = "0.9.0"
-
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
-
dependencies = [
-
"castaway",
-
"cfg-if",
-
"itoa",
-
"rustversion",
-
"ryu",
-
"static_assertions",
-
]
-
-
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
dependencies = [
"chrono",
"cid",
-
"compact_str",
"miette",
"multibase",
"multihash",
···
"serde",
"serde_html_form",
"serde_json",
+
"smol_str",
"thiserror",
]
···
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
-
name = "static_assertions"
-
version = "1.1.0"
+
name = "smol_str"
+
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
checksum = "9676b89cd56310a87b93dec47b11af744f34d5fc9f367b829474eec0a891350d"
+
dependencies = [
+
"borsh",
+
"serde",
+
]
[[package]]
name = "strsim"
+7
Cargo.toml
···
edition = "2024"
version = "0.1.0"
authors = ["Orual <orual@nonbinary.computer>"]
+
repository = "https://tangled.org/@nonbinary.computer/jacquard"
+
keywords = ["atproto", "at protocol", "bluesky", "api", "client"]
+
categories = ["api-bindings", "web-programming::http-client"]
+
readme = "README.md"
+
documentation = "https://docs.rs/jacquard"
+
exclude = [".direnv"]
+
description = "A simple Rust project using Nix"
+26
README.md
···
```
There's also a [`justfile`](https://just.systems/) for Makefile-esque commands to be run inside of the devShell, and you can generally `cargo ...` or `just ...` whatever just fine if you don't want to use Nix and have the prerequisites installed.
+
+
+
+
### String types
+
Something of a note to self. Developing a pattern with the string types (may macro-ify at some point). Each needs:
+
- new(): constructing from a string slice with the right lifetime that borrows
+
- new_owned(): constructing from an impl AsRef<str>, taking ownership
+
- new_static(): construction from a &'static str, using SmolStr's/CowStr's new_static() constructor to not allocate
+
- raw(): same as new() but panics instead of erroring
+
- unchecked(): same as new() but doesn't validate. marked unsafe.
+
- as_str(): does what it says on the tin
+
#### Traits:
+
- Serialize + Deserialize (custom impl for latter, sometimes for former)
+
- FromStr
+
- Display
+
- Debug, PartialEq, Eq, Hash, Clone
+
- From<T> for String, CowStr, SmolStr,
+
- From<String>, From<CowStr>, From<SmolStr>, or TryFrom if likely enough to fail in practice to make panics common
+
- AsRef<str>
+
- Deref with Target = str (usually)
+
+
Use `#[repr(transparent)]` as much as possible. Main exception is at-uri type and components.
+
Use SmolStr directly as the inner type if most or all of the instances will be under 24 bytes, save lifetime headaches.
+
Use CowStr for longer to allow for borrowing from input.
+
+
TODO: impl IntoStatic trait to take ownership of string types
+1 -1
crates/jacquard-common/Cargo.toml
···
[dependencies]
chrono = "0.4.42"
cid = { version = "0.11.1", features = ["serde", "std"] }
-
compact_str = "0.9.0"
miette = "7.6.0"
multibase = "0.9.1"
multihash = "0.19.3"
···
serde = { version = "1.0.227", features = ["derive"] }
serde_html_form = "0.2.8"
serde_json = "1.0.145"
+
smol_str = { version = "0.3.2", features = ["serde"] }
thiserror = "2.0.16"
+13 -9
crates/jacquard-common/src/cowstr.rs
···
-
use compact_str::CompactString;
use serde::{Deserialize, Serialize};
+
use smol_str::SmolStr;
use std::{
borrow::Cow,
fmt,
···
use crate::IntoStatic;
/// Shamelessly copied from https://github.com/bearcove/merde
-
/// A copy-on-write string type that uses [`CompactString`] for
+
/// A copy-on-write immutable string type that uses [`SmolStr`] for
/// the "owned" variant.
///
/// The standard [`Cow`] type cannot be used, since
-
/// `<str as ToOwned>::Owned` is `String`, and not `CompactString`.
+
/// `<str as ToOwned>::Owned` is `String`, and not `SmolStr`.
#[derive(Clone)]
pub enum CowStr<'s> {
Borrowed(&'s str),
-
Owned(CompactString),
+
Owned(SmolStr),
}
impl CowStr<'static> {
···
/// if the `compact_str` feature is disabled, or if the string is longer
/// than `MAX_INLINE_SIZE`.
pub fn copy_from_str(s: &str) -> Self {
-
Self::Owned(CompactString::from(s))
+
Self::Owned(SmolStr::from(s))
+
}
+
+
pub fn new_static(s: &'static str) -> Self {
+
Self::Owned(SmolStr::new_static(s))
}
}
···
#[inline]
pub fn from_utf8_owned(s: Vec<u8>) -> Result<Self, std::str::Utf8Error> {
-
Ok(Self::Owned(CompactString::from_utf8(s)?))
+
Ok(Self::Owned(SmolStr::new(std::str::from_utf8(&s)?)))
}
#[inline]
pub fn from_utf8_lossy(s: &'s [u8]) -> Self {
-
Self::Owned(CompactString::from_utf8_lossy(s))
+
Self::Owned(String::from_utf8_lossy(&s).into())
}
/// # Safety
···
/// This function is unsafe because it does not check that the bytes are valid UTF-8.
#[inline]
pub unsafe fn from_utf8_unchecked(s: &'s [u8]) -> Self {
-
unsafe { Self::Owned(CompactString::from_utf8_unchecked(s)) }
+
unsafe { Self::Owned(SmolStr::new(std::str::from_utf8_unchecked(s))) }
}
}
···
fn from(s: CowStr<'_>) -> Self {
match s {
CowStr::Borrowed(s) => s.into(),
-
CowStr::Owned(s) => s.into(),
+
CowStr::Owned(s) => String::from(s).into_boxed_str(),
}
}
}
+8
crates/jacquard-common/src/types.rs
···
pub mod aturi;
pub mod blob;
pub mod cid;
+
pub mod collection;
pub mod datetime;
pub mod did;
pub mod handle;
···
pub mod integer;
pub mod link;
pub mod nsid;
+
pub mod recordkey;
pub mod tid;
+
+
/// Trait for a constant string literal type
+
pub trait Literal: Clone + Copy + PartialEq + Eq + Send + Sync + 'static {
+
/// The string literal
+
const LITERAL: &'static str;
+
}
+200 -73
crates/jacquard-common/src/types/aturi.rs
···
+
use crate::CowStr;
+
use crate::types::ident::AtIdentifier;
+
use crate::types::nsid::Nsid;
+
use crate::types::recordkey::{RecordKey, Rkey};
+
use regex::Regex;
+
use serde::Serializer;
+
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::ToSmolStr;
use std::fmt;
use std::sync::LazyLock;
use std::{ops::Deref, str::FromStr};
-
use compact_str::ToCompactString;
-
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
/// at:// URI type
+
///
+
/// based on the regex here: https://github.com/bluesky-social/atproto/blob/main/packages/syntax/src/aturi_validation.ts
+
///
+
/// Doesn't support the query segment, but then neither does the Typescript SDK
+
///
+
/// TODO: support IntoStatic on string types. For composites like this where all borrow from (present) input,
+
/// perhaps use some careful unsafe to launder the lifetimes.
+
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+
pub struct AtUri<'u> {
+
uri: CowStr<'u>,
+
pub authority: AtIdentifier<'u>,
+
pub path: Option<UriPath<'u>>,
+
pub fragment: Option<CowStr<'u>>,
+
}
-
use crate::{CowStr, IntoStatic};
-
use regex::Regex;
+
/// at:// URI path component (current subset)
+
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+
pub struct UriPath<'u> {
+
pub collection: Nsid<'u>,
+
pub rkey: Option<RecordKey<Rkey<'u>>>,
+
}
-
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)]
-
#[serde(transparent)]
-
pub struct AtUri<'a>(CowStr<'a>);
+
pub type UriPathBuf = UriPath<'static>;
-
pub static AT_URI_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^$").unwrap());
+
pub static ATURI_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+
Regex::new(r##"^at://(?<authority>[a-zA-Z0-9._:%-]+)(/(?<collection>[a-zA-Z0-9-.]+)(/(?<rkey>[a-zA-Z0-9._~:@!$&%')(*+,;=-]+))?)?(#(?<fragment>/[a-zA-Z0-9._~:@!$&%')(*+,;=-[]/\]*))?$"##).unwrap()
+
});
-
impl<'a> AtUri<'a> {
+
impl<'u> AtUri<'u> {
/// Fallible constructor, validates, borrows from input
-
pub fn new(uri: &'a str) -> Result<Self, &'static str> {
-
if uri.len() > 2048 {
-
Err("AT_URI too long")
-
} else if !AT_URI_REGEX.is_match(uri) {
-
Err("Invalid AT_URI")
+
pub fn new(uri: &'u str) -> Result<Self, &'static str> {
+
if let Some(parts) = ATURI_REGEX.captures(uri) {
+
if let Some(authority) = parts.name("authority") {
+
let authority = AtIdentifier::new(authority.as_str())?;
+
let path = if let Some(collection) = parts.name("collection") {
+
let collection = Nsid::new(collection.as_str())?;
+
let rkey = if let Some(rkey) = parts.name("rkey") {
+
let rkey = RecordKey::from(Rkey::new(rkey.as_str())?);
+
Some(rkey)
+
} else {
+
None
+
};
+
Some(UriPath { collection, rkey })
+
} else {
+
None
+
};
+
let fragment = parts.name("fragment").map(|fragment| {
+
let fragment = CowStr::Borrowed(fragment.as_str());
+
fragment
+
});
+
Ok(AtUri {
+
uri: CowStr::Borrowed(uri),
+
authority,
+
path,
+
fragment,
+
})
+
} else {
+
Err("at:// URI missing authority")
+
}
} else {
-
Ok(Self(CowStr::Borrowed(uri)))
+
Err("Invalid at:// URI via regex")
}
}
-
/// Fallible constructor from an existing CowStr, clones and takes
-
pub fn from_cowstr(uri: CowStr<'a>) -> Result<AtUri<'a>, &'static str> {
-
if uri.len() > 2048 {
-
Err("AT_URI too long")
-
} else if !AT_URI_REGEX.is_match(&uri) {
-
Err("Invalid AT_URI")
+
pub fn new_owned(uri: impl AsRef<str>) -> Result<Self, &'static str> {
+
let uri = uri.as_ref();
+
if let Some(parts) = ATURI_REGEX.captures(uri) {
+
if let Some(authority) = parts.name("authority") {
+
let authority = AtIdentifier::new_owned(authority.as_str())?;
+
let path = if let Some(collection) = parts.name("collection") {
+
let collection = Nsid::new_owned(collection.as_str())?;
+
let rkey = if let Some(rkey) = parts.name("rkey") {
+
let rkey = RecordKey::from(Rkey::new_owned(rkey.as_str())?);
+
Some(rkey)
+
} else {
+
None
+
};
+
Some(UriPath { collection, rkey })
+
} else {
+
None
+
};
+
let fragment = parts.name("fragment").map(|fragment| {
+
let fragment = CowStr::Owned(fragment.as_str().to_smolstr());
+
fragment
+
});
+
Ok(AtUri {
+
uri: CowStr::Owned(uri.to_smolstr()),
+
authority,
+
path,
+
fragment,
+
})
+
} else {
+
Err("at:// URI missing authority")
+
}
} else {
-
Ok(Self(uri.into_static()))
+
Err("Invalid at:// URI via regex")
}
}
-
/// Infallible constructor for when you *know* the string slice is a valid at:// uri.
-
/// Will panic on invalid URIs. If you're manually decoding atproto records
-
/// or API values you know are valid (rather than using serde), this is the one to use.
-
/// The From<String> and From<CowStr> impls use the same logic.
-
pub fn raw(uri: &'a str) -> Self {
-
if uri.len() > 2048 {
-
panic!("AT_URI too long")
-
} else if !AT_URI_REGEX.is_match(uri) {
-
panic!("Invalid AT_URI")
+
pub fn new_static(uri: &'static str) -> Result<AtUri<'static>, &'static str> {
+
let uri = uri.as_ref();
+
if let Some(parts) = ATURI_REGEX.captures(uri) {
+
if let Some(authority) = parts.name("authority") {
+
let authority = AtIdentifier::new_static(authority.as_str())?;
+
let path = if let Some(collection) = parts.name("collection") {
+
let collection = Nsid::new_static(collection.as_str())?;
+
let rkey = if let Some(rkey) = parts.name("rkey") {
+
let rkey = RecordKey::from(Rkey::new_static(rkey.as_str())?);
+
Some(rkey)
+
} else {
+
None
+
};
+
Some(UriPath { collection, rkey })
+
} else {
+
None
+
};
+
let fragment = parts.name("fragment").map(|fragment| {
+
let fragment = CowStr::new_static(fragment.as_str());
+
fragment
+
});
+
Ok(AtUri {
+
uri: CowStr::new_static(uri),
+
authority,
+
path,
+
fragment,
+
})
+
} else {
+
Err("at:// URI missing authority")
+
}
} else {
-
Self(CowStr::Borrowed(uri))
+
Err("Invalid at:// URI via regex")
}
}
-
/// Infallible constructor for when you *know* the string is a valid AT_URI.
-
/// Marked unsafe because responsibility for upholding the invariant is on the developer.
-
pub unsafe fn unchecked(uri: &'a str) -> Self {
-
Self(CowStr::Borrowed(uri))
+
pub unsafe fn unchecked(uri: &'u str) -> Self {
+
if let Some(parts) = ATURI_REGEX.captures(uri) {
+
if let Some(authority) = parts.name("authority") {
+
let authority = unsafe { AtIdentifier::unchecked(authority.as_str()) };
+
let path = if let Some(collection) = parts.name("collection") {
+
let collection = unsafe { Nsid::unchecked(collection.as_str()) };
+
let rkey = if let Some(rkey) = parts.name("rkey") {
+
let rkey = RecordKey::from(unsafe { Rkey::unchecked(rkey.as_str()) });
+
Some(rkey)
+
} else {
+
None
+
};
+
Some(UriPath { collection, rkey })
+
} else {
+
None
+
};
+
let fragment = parts.name("fragment").map(|fragment| {
+
let fragment = CowStr::Borrowed(fragment.as_str());
+
fragment
+
});
+
AtUri {
+
uri: CowStr::Borrowed(uri),
+
authority,
+
path,
+
fragment,
+
}
+
} else {
+
Self {
+
uri: CowStr::Borrowed(uri),
+
authority: unsafe { AtIdentifier::unchecked(uri) },
+
path: None,
+
fragment: None,
+
}
+
}
+
} else {
+
Self {
+
uri: CowStr::Borrowed(uri),
+
authority: unsafe { AtIdentifier::unchecked(uri) },
+
path: None,
+
fragment: None,
+
}
+
}
}
pub fn as_str(&self) -> &str {
{
-
let this = &self.0;
+
let this = &self.uri;
this
}
}
···
type Err = &'static str;
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
-
/// Prefer `AtUri::new()` or `AtUri::raw` if you want to borrow.
+
/// Prefer `AtUri::new()` or `AtUri::raw()` if you want to borrow.
fn from_str(s: &str) -> Result<Self, Self::Err> {
-
Self::from_cowstr(CowStr::Owned(s.to_compact_string()))
+
Self::new_owned(s)
}
}
-
impl<'ae> Deserialize<'ae> for AtUri<'ae> {
+
impl<'de> Deserialize<'de> for AtUri<'de> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
-
D: Deserializer<'ae>,
+
D: Deserializer<'de>,
{
let value = Deserialize::deserialize(deserializer)?;
Self::new(value).map_err(D::Error::custom)
}
}
-
impl fmt::Display for AtUri<'_> {
-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-
f.write_str(&self.0)
+
impl Serialize for AtUri<'_> {
+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+
where
+
S: Serializer,
+
{
+
serializer.serialize_str(&self.uri)
}
}
-
impl<'a> From<AtUri<'a>> for String {
-
fn from(value: AtUri<'a>) -> Self {
-
value.0.to_string()
+
impl fmt::Display for AtUri<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
f.write_str(&self.uri)
}
}
-
impl<'s> From<&'s AtUri<'_>> for &'s str {
-
fn from(value: &'s AtUri<'_>) -> Self {
-
value.0.as_ref()
+
impl<'d> From<AtUri<'d>> for String {
+
fn from(value: AtUri<'d>) -> Self {
+
value.uri.to_string()
}
}
-
impl<'a> From<AtUri<'a>> for CowStr<'a> {
-
fn from(value: AtUri<'a>) -> Self {
-
value.0
+
impl<'d> From<AtUri<'d>> for CowStr<'d> {
+
fn from(value: AtUri<'d>) -> Self {
+
value.uri
}
}
-
impl From<String> for AtUri<'static> {
-
fn from(value: String) -> Self {
-
if value.len() > 2048 {
-
panic!("AT_URI too long")
-
} else if !AT_URI_REGEX.is_match(&value) {
-
panic!("Invalid AT_URI")
-
} else {
-
Self(CowStr::Owned(value.to_compact_string()))
-
}
+
impl TryFrom<String> for AtUri<'static> {
+
type Error = &'static str;
+
+
fn try_from(value: String) -> Result<Self, Self::Error> {
+
Self::new_owned(&value)
}
}
-
impl<'a> From<CowStr<'a>> for AtUri<'a> {
-
fn from(value: CowStr<'a>) -> Self {
-
if value.len() > 2048 {
-
panic!("AT_URI too long")
-
} else if !AT_URI_REGEX.is_match(&value) {
-
panic!("Invalid AT_URI")
-
} else {
-
Self(value)
-
}
+
impl<'d> TryFrom<CowStr<'d>> for AtUri<'d> {
+
type Error = &'static str;
+
/// TODO: rewrite to avoid taking ownership/cloning
+
fn try_from(value: CowStr<'d>) -> Result<Self, Self::Error> {
+
Self::new_owned(value)
}
}
impl AsRef<str> for AtUri<'_> {
fn as_ref(&self) -> &str {
-
self.as_str()
+
&self.uri.as_ref()
}
}
···
type Target = str;
fn deref(&self) -> &Self::Target {
-
self.as_str()
+
self.uri.as_ref()
}
}
+23 -5
crates/jacquard-common/src/types/blob.rs
···
-
use crate::{CowStr, types::cid::Cid};
-
use compact_str::ToCompactString;
+
use crate::{CowStr, IntoStatic, types::cid::Cid};
#[allow(unused)]
use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error};
+
use smol_str::ToSmolStr;
+
use std::convert::Infallible;
#[allow(unused)]
use std::{
borrow::Cow,
···
/// Wrapper for file type
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(transparent)]
+
#[repr(transparent)]
pub struct MimeType<'m>(pub CowStr<'m>);
impl<'m> MimeType<'m> {
/// Fallible constructor, validates, borrows from input
pub fn new(mime_type: &'m str) -> Result<MimeType<'m>, &'static str> {
Ok(Self(CowStr::Borrowed(mime_type)))
+
}
+
+
pub fn new_owned(mime_type: impl AsRef<str>) -> Self {
+
Self(CowStr::Owned(mime_type.as_ref().to_smolstr()))
+
}
+
+
pub fn new_static(mime_type: &'static str) -> Self {
+
Self(CowStr::new_static(mime_type))
}
/// Fallible constructor from an existing CowStr, borrows
···
}
impl FromStr for MimeType<'_> {
-
type Err = &'static str;
+
type Err = Infallible;
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
fn from_str(s: &str) -> Result<Self, Self::Err> {
-
Self::from_cowstr(CowStr::Owned(s.to_compact_string()))
+
Ok(Self::new_owned(s))
+
}
+
}
+
+
impl IntoStatic for MimeType<'_> {
+
type Output = MimeType<'static>;
+
+
fn into_static(self) -> Self::Output {
+
MimeType(self.0.into_static())
}
}
···
impl From<String> for MimeType<'static> {
fn from(value: String) -> Self {
-
Self(CowStr::Owned(value.to_compact_string()))
+
Self(CowStr::Owned(value.to_smolstr()))
}
}
+21 -10
crates/jacquard-common/src/types/cid.rs
···
-
use std::{convert::Infallible, fmt, marker::PhantomData, ops::Deref, str::FromStr};
-
-
use compact_str::ToCompactString;
-
use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Visitor};
-
+
use crate::{CowStr, IntoStatic};
pub use cid::Cid as IpldCid;
-
-
use crate::CowStr;
+
use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Visitor};
+
use smol_str::ToSmolStr;
+
use std::{convert::Infallible, fmt, marker::PhantomData, ops::Deref, str::FromStr};
/// raw
pub const ATP_CID_CODEC: u64 = 0x55;
···
let s = CowStr::Owned(
cid.to_string_of_base(ATP_CID_BASE)
.unwrap_or_default()
-
.to_compact_string(),
+
.to_smolstr(),
);
Self::Ipld { cid, s }
}
···
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
fn from_str(s: &str) -> Result<Self, Self::Err> {
-
Ok(Cid::Str(CowStr::Owned(s.to_compact_string())))
+
Ok(Cid::Str(CowStr::Owned(s.to_smolstr())))
+
}
+
}
+
+
impl IntoStatic for Cid<'_> {
+
type Output = Cid<'static>;
+
+
fn into_static(self) -> Self::Output {
+
match self {
+
Cid::Ipld { cid, s } => Cid::Ipld {
+
cid,
+
s: s.into_static(),
+
},
+
Cid::Str(cow_str) => Cid::Str(cow_str.into_static()),
+
}
}
}
···
impl From<String> for Cid<'_> {
fn from(value: String) -> Self {
-
Cid::Str(CowStr::Owned(value.to_compact_string()))
+
Cid::Str(CowStr::Owned(value.to_smolstr()))
}
}
+52
crates/jacquard-common/src/types/collection.rs
···
+
use core::fmt;
+
+
use serde::{Serialize, de};
+
+
use crate::types::{
+
aturi::UriPath,
+
nsid::Nsid,
+
recordkey::{RecordKey, RecordKeyType, Rkey},
+
};
+
+
/// Trait for a collection of records that can be stored in a repository.
+
///
+
/// The records all have the same Lexicon schema.
+
pub trait Collection: fmt::Debug {
+
/// The NSID for the Lexicon that defines the schema of records in this collection.
+
const NSID: &'static str;
+
+
/// This collection's record type.
+
type Record: fmt::Debug + de::DeserializeOwned + Serialize;
+
+
/// Returns the [`Nsid`] for the Lexicon that defines the schema of records in this
+
/// collection.
+
///
+
/// This is a convenience method that parses [`Self::NSID`].
+
///
+
/// # Panics
+
///
+
/// Panics if [`Self::NSID`] is not a valid NSID.
+
///
+
/// [`Nsid`]: string::Nsid
+
fn nsid() -> crate::types::nsid::Nsid<'static> {
+
Nsid::new_static(Self::NSID).expect("should be valid NSID")
+
}
+
+
/// Returns the repo path for a record in this collection with the given record key.
+
///
+
/// Per the [Repo Data Structure v3] specification:
+
/// > Repo paths currently have a fixed structure of `<collection>/<record-key>`. This
+
/// > means a valid, normalized [`Nsid`], followed by a `/`, followed by a valid
+
/// > [`RecordKey`].
+
///
+
/// [Repo Data Structure v3]: https://atproto.com/specs/repository#repo-data-structure-v3
+
/// [`Nsid`]: string::Nsid
+
fn repo_path<'u, T: RecordKeyType>(
+
rkey: &'u crate::types::recordkey::RecordKey<T>,
+
) -> UriPath<'u> {
+
UriPath {
+
collection: Self::nsid(),
+
rkey: Some(RecordKey::from(Rkey::raw(rkey.as_ref()))),
+
}
+
}
+
}
+5 -6
crates/jacquard-common/src/types/datetime.rs
···
-
use std::sync::LazyLock;
-
use std::{cmp, str::FromStr};
-
use chrono::DurationRound;
-
use compact_str::ToCompactString;
use serde::Serializer;
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::ToSmolStr;
+
use std::sync::LazyLock;
+
use std::{cmp, str::FromStr};
use crate::{CowStr, IntoStatic};
use regex::Regex;
···
// This serialization format is compatible with ISO 8601.
let serialized = CowStr::Owned(
dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true)
-
.to_compact_string(),
+
.to_smolstr(),
);
Self { serialized, dt }
}
···
if ISO8601_REGEX.is_match(&value) {
let dt = chrono::DateTime::parse_from_rfc3339(&value)?;
Ok(Self {
-
serialized: CowStr::Owned(value.to_compact_string()),
+
serialized: CowStr::Owned(value.to_smolstr()),
dt,
})
} else {
+53 -14
crates/jacquard-common/src/types/did.rs
···
+
use crate::{CowStr, IntoStatic};
+
use regex::Regex;
+
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::ToSmolStr;
use std::fmt;
use std::sync::LazyLock;
use std::{ops::Deref, str::FromStr};
-
use compact_str::ToCompactString;
-
use serde::{Deserialize, Deserializer, Serialize, de::Error};
-
-
use crate::{CowStr, IntoStatic};
-
use regex::Regex;
-
-
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)]
+
#[derive(Clone, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
+
#[repr(transparent)]
pub struct Did<'d>(CowStr<'d>);
pub static DID_REGEX: LazyLock<Regex> =
···
impl<'d> Did<'d> {
/// Fallible constructor, validates, borrows from input
pub fn new(did: &'d str) -> Result<Self, &'static str> {
+
let did = did.strip_prefix("at://").unwrap_or(did);
if did.len() > 2048 {
Err("DID too long")
} else if !DID_REGEX.is_match(did) {
···
}
}
-
/// Fallible constructor from an existing CowStr, takes ownership
-
pub fn from_cowstr(did: CowStr<'d>) -> Result<Did<'d>, &'static str> {
+
/// Fallible constructor, validates, takes ownership
+
pub fn new_owned(did: impl AsRef<str>) -> Result<Self, &'static str> {
+
let did = did.as_ref();
+
let did = did.strip_prefix("at://").unwrap_or(did);
if did.len() > 2048 {
Err("DID too long")
-
} else if !DID_REGEX.is_match(&did) {
+
} else if !DID_REGEX.is_match(did) {
+
Err("Invalid DID")
+
} else {
+
Ok(Self(CowStr::Owned(did.to_smolstr())))
+
}
+
}
+
+
/// Fallible constructor, validates, doesn't allocate
+
pub fn new_static(did: &'static str) -> Result<Self, &'static str> {
+
let did = did.strip_prefix("at://").unwrap_or(did);
+
if did.len() > 2048 {
+
Err("DID too long")
+
} else if !DID_REGEX.is_match(did) {
Err("Invalid DID")
} else {
-
Ok(Self(did.into_static()))
+
Ok(Self(CowStr::new_static(did)))
}
}
···
/// or API values you know are valid (rather than using serde), this is the one to use.
/// The From<String> and From<CowStr> impls use the same logic.
pub fn raw(did: &'d str) -> Self {
+
let did = did.strip_prefix("at://").unwrap_or(did);
if did.len() > 2048 {
panic!("DID too long")
} else if !DID_REGEX.is_match(did) {
···
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
/// Prefer `Did::new()` or `Did::raw` if you want to borrow.
fn from_str(s: &str) -> Result<Self, Self::Err> {
-
Self::from_cowstr(CowStr::Borrowed(s).into_static())
+
Self::new_owned(s)
+
}
+
}
+
+
impl IntoStatic for Did<'_> {
+
type Output = Did<'static>;
+
+
fn into_static(self) -> Self::Output {
+
Did(self.0.into_static())
}
}
···
}
}
+
impl fmt::Debug for Did<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
write!(f, "at://{}", self.0)
+
}
+
}
+
impl<'d> From<Did<'d>> for String {
fn from(value: Did<'d>) -> Self {
value.0.to_string()
···
impl From<String> for Did<'static> {
fn from(value: String) -> Self {
+
let value = if let Some(did) = value.strip_prefix("at://") {
+
CowStr::Borrowed(did)
+
} else {
+
value.into()
+
};
if value.len() > 2048 {
panic!("DID too long")
} else if !DID_REGEX.is_match(&value) {
panic!("Invalid DID")
} else {
-
Self(CowStr::Owned(value.to_compact_string()))
+
Self(value.into_static())
}
}
}
impl<'d> From<CowStr<'d>> for Did<'d> {
fn from(value: CowStr<'d>) -> Self {
+
let value = if let Some(did) = value.strip_prefix("at://") {
+
CowStr::Borrowed(did)
+
} else {
+
value
+
};
if value.len() > 2048 {
panic!("DID too long")
} else if !DID_REGEX.is_match(&value) {
panic!("Invalid DID")
} else {
-
Self(value)
+
Self(value.into_static())
}
}
}
+88 -30
crates/jacquard-common/src/types/handle.rs
···
+
use crate::{CowStr, IntoStatic};
+
use regex::Regex;
+
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::ToSmolStr;
use std::fmt;
use std::sync::LazyLock;
use std::{ops::Deref, str::FromStr};
-
use compact_str::ToCompactString;
-
use serde::{Deserialize, Deserializer, Serialize, de::Error};
-
-
use crate::{CowStr, IntoStatic};
-
use regex::Regex;
-
-
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Hash)]
+
#[derive(Clone, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
+
#[repr(transparent)]
pub struct Handle<'h>(CowStr<'h>);
pub static HANDLE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$").unwrap()
});
+
/// AT Protocol handle
impl<'h> Handle<'h> {
/// Fallible constructor, validates, borrows from input
///
/// Accepts (and strips) preceding '@' if present
pub fn new(handle: &'h str) -> Result<Self, &'static str> {
-
let handle = handle.strip_prefix('@').unwrap_or(handle);
-
if handle.len() > 2048 {
+
let handle = handle
+
.strip_prefix("at://")
+
.unwrap_or(handle)
+
.strip_prefix('@')
+
.unwrap_or(handle);
+
if handle.len() > 253 {
Err("handle too long")
} else if !HANDLE_REGEX.is_match(handle) {
Err("Invalid handle")
···
}
}
-
/// Fallible constructor from an existing CowStr, takes ownership
-
///
-
/// Accepts (and strips) preceding '@' if present
-
pub fn from_cowstr(handle: CowStr<'h>) -> Result<Handle<'h>, &'static str> {
-
let handle = if let Some(handle) = handle.strip_prefix('@') {
-
CowStr::Borrowed(handle)
-
} else {
-
handle
-
};
-
if handle.len() > 2048 {
+
/// Fallible constructor, validates, takes ownership
+
pub fn new_owned(handle: impl AsRef<str>) -> Result<Self, &'static str> {
+
let handle = handle.as_ref();
+
let handle = handle
+
.strip_prefix("at://")
+
.unwrap_or(handle)
+
.strip_prefix('@')
+
.unwrap_or(handle);
+
if handle.len() > 253 {
Err("handle too long")
-
} else if !HANDLE_REGEX.is_match(&handle) {
+
} else if !HANDLE_REGEX.is_match(handle) {
Err("Invalid handle")
} else {
-
Ok(Self(handle.into_static()))
+
Ok(Self(CowStr::Owned(handle.to_smolstr())))
}
}
+
/// Fallible constructor, validates, doesn't allocate
+
pub fn new_static(handle: &'static str) -> Result<Self, &'static str> {
+
let handle = handle
+
.strip_prefix("at://")
+
.unwrap_or(handle)
+
.strip_prefix('@')
+
.unwrap_or(handle);
+
if handle.len() > 253 {
+
Err("handle too long")
+
} else if !HANDLE_REGEX.is_match(handle) {
+
Err("Invalid handle")
+
} else {
+
Ok(Self(CowStr::new_static(handle)))
+
}
+
}
/// Infallible constructor for when you *know* the string is a valid handle.
/// Will panic on invalid handles. If you're manually decoding atproto records
/// or API values you know are valid (rather than using serde), this is the one to use.
···
///
/// Accepts (and strips) preceding '@' if present
pub fn raw(handle: &'h str) -> Self {
-
let handle = handle.strip_prefix('@').unwrap_or(handle);
-
if handle.len() > 2048 {
+
let handle = handle
+
.strip_prefix("at://")
+
.unwrap_or(handle)
+
.strip_prefix('@')
+
.unwrap_or(handle);
+
if handle.len() > 253 {
panic!("handle too long")
} else if !HANDLE_REGEX.is_match(handle) {
panic!("Invalid handle")
···
///
/// Accepts (and strips) preceding '@' if present
pub unsafe fn unchecked(handle: &'h str) -> Self {
-
let handle = handle.strip_prefix('@').unwrap_or(handle);
+
let handle = handle
+
.strip_prefix("at://")
+
.unwrap_or(handle)
+
.strip_prefix('@')
+
.unwrap_or(handle);
Self(CowStr::Borrowed(handle))
}
···
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
/// Prefer `Handle::new()` or `Handle::raw` if you want to borrow.
fn from_str(s: &str) -> Result<Self, Self::Err> {
-
Self::from_cowstr(CowStr::Borrowed(s).into_static())
+
Self::new_owned(s)
+
}
+
}
+
+
impl IntoStatic for Handle<'_> {
+
type Output = Handle<'static>;
+
+
fn into_static(self) -> Self::Output {
+
Handle(self.0.into_static())
}
}
···
impl fmt::Display for Handle<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-
write!(f, "@{}", self.0)
+
write!(f, "{}", self.0)
+
}
+
}
+
+
impl fmt::Debug for Handle<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
write!(f, "at://{}", self.0)
}
}
···
impl From<String> for Handle<'static> {
fn from(value: String) -> Self {
-
if value.len() > 2048 {
+
let value = if let Some(handle) = value
+
.strip_prefix("at://")
+
.unwrap_or(&value)
+
.strip_prefix('@')
+
{
+
CowStr::Borrowed(handle)
+
} else {
+
value.into()
+
};
+
if value.len() > 253 {
panic!("handle too long")
} else if !HANDLE_REGEX.is_match(&value) {
panic!("Invalid handle")
} else {
-
Self(CowStr::Owned(value.to_compact_string()))
+
Self(value.into_static())
}
}
}
impl<'h> From<CowStr<'h>> for Handle<'h> {
fn from(value: CowStr<'h>) -> Self {
-
if value.len() > 2048 {
+
let value = if let Some(handle) = value
+
.strip_prefix("at://")
+
.unwrap_or(&value)
+
.strip_prefix('@')
+
{
+
CowStr::Borrowed(handle)
+
} else {
+
value
+
};
+
if value.len() > 253 {
panic!("handle too long")
} else if !HANDLE_REGEX.is_match(&value) {
panic!("Invalid handle")
} else {
-
Self(value)
+
Self(value.into_static())
}
}
}
+26 -5
crates/jacquard-common/src/types/ident.rs
···
-
use crate::types::did::Did;
use crate::types::handle::Handle;
+
use crate::{IntoStatic, types::did::Did};
use std::fmt;
use std::str::FromStr;
···
}
}
-
/// Fallible constructor from an existing CowStr, borrows
-
pub fn from_cowstr(ident: CowStr<'i>) -> Result<AtIdentifier<'i>, &'static str> {
-
if let Ok(did) = ident.parse() {
+
/// Fallible constructor, validates, takes ownership
+
pub fn new_owned(ident: impl AsRef<str>) -> Result<Self, &'static str> {
+
let ident = ident.as_ref();
+
if let Ok(did) = Did::new_owned(ident) {
Ok(AtIdentifier::Did(did))
} else {
-
ident.parse().map(AtIdentifier::Handle)
+
Ok(AtIdentifier::Handle(Handle::new_owned(ident)?))
+
}
+
}
+
+
/// Fallible constructor, validates, doesn't allocate
+
pub fn new_static(ident: &'static str) -> Result<AtIdentifier<'static>, &'static str> {
+
if let Ok(did) = Did::new_static(ident) {
+
Ok(AtIdentifier::Did(did))
+
} else {
+
Ok(AtIdentifier::Handle(Handle::new_static(ident)?))
}
}
···
Ok(AtIdentifier::Did(did))
} else {
s.parse().map(AtIdentifier::Handle)
+
}
+
}
+
}
+
+
impl IntoStatic for AtIdentifier<'_> {
+
type Output = AtIdentifier<'static>;
+
+
fn into_static(self) -> Self::Output {
+
match self {
+
AtIdentifier::Did(did) => AtIdentifier::Did(did.into_static()),
+
AtIdentifier::Handle(handle) => AtIdentifier::Handle(handle.into_static()),
}
}
}
+209
crates/jacquard-common/src/types/nsid.rs
···
+
use crate::types::recordkey::RecordKeyType;
+
use crate::{CowStr, IntoStatic};
+
use regex::Regex;
+
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::{SmolStr, ToSmolStr};
+
use std::fmt;
+
use std::sync::LazyLock;
+
use std::{ops::Deref, str::FromStr};
+
/// Namespaced Identifier (NSID)
+
///
+
/// Stored as SmolStr to ease lifetime issues and because, despite the fact that NSIDs *can* be 317 characters, most are quite short
+
/// TODO: consider if this should go back to CowStr, or be broken up into segments
+
#[derive(Clone, PartialEq, Eq, Serialize, Hash)]
+
#[serde(transparent)]
+
#[repr(transparent)]
+
pub struct Nsid<'n>(CowStr<'n>);
+
+
pub static NSID_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+
Regex::new(r"^[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(\.[a-zA-Z][a-zA-Z0-9]{0,62})$").unwrap()
+
});
+
+
impl<'n> Nsid<'n> {
+
/// Fallible constructor, validates, borrows from input
+
pub fn new(nsid: &'n str) -> Result<Self, &'static str> {
+
if nsid.len() > 317 {
+
Err("NSID too long")
+
} else if !NSID_REGEX.is_match(nsid) {
+
Err("Invalid NSID")
+
} else {
+
Ok(Self(CowStr::Borrowed(nsid)))
+
}
+
}
+
+
/// Fallible constructor, validates, borrows from input
+
pub fn new_owned(nsid: impl AsRef<str>) -> Result<Self, &'static str> {
+
let nsid = nsid.as_ref();
+
if nsid.len() > 317 {
+
Err("NSID too long")
+
} else if !NSID_REGEX.is_match(nsid) {
+
Err("Invalid NSID")
+
} else {
+
Ok(Self(CowStr::Owned(nsid.to_smolstr())))
+
}
+
}
+
+
/// Fallible constructor, validates, doesn't allocate
+
pub fn new_static(nsid: &'static str) -> Result<Self, &'static str> {
+
if nsid.len() > 317 {
+
Err("NSID too long")
+
} else if !NSID_REGEX.is_match(nsid) {
+
Err("Invalid NSID")
+
} else {
+
Ok(Self(CowStr::new_static(nsid)))
+
}
+
}
+
+
/// Infallible constructor for when you *know* the string is a valid NSID.
+
/// Will panic on invalid NSIDs. If you're manually decoding atproto records
+
/// or API values you know are valid (rather than using serde), this is the one to use.
+
/// The From<String> and From<CowStr> impls use the same logic.
+
pub fn raw(nsid: &'n str) -> Self {
+
if nsid.len() > 317 {
+
panic!("NSID too long")
+
} else if !NSID_REGEX.is_match(nsid) {
+
panic!("Invalid NSID")
+
} else {
+
Self(CowStr::Borrowed(nsid))
+
}
+
}
+
+
/// Infallible constructor for when you *know* the string is a valid NSID.
+
/// Marked unsafe because responsibility for upholding the invariant is on the developer.
+
pub unsafe fn unchecked(nsid: &'n str) -> Self {
+
Self(CowStr::Borrowed(nsid))
+
}
+
+
/// Returns the domain authority part of the NSID.
+
pub fn domain_authority(&self) -> &str {
+
let split = self.0.rfind('.').expect("enforced by constructor");
+
&self.0[..split]
+
}
+
+
/// Returns the name segment of the NSID.
+
pub fn name(&self) -> &str {
+
let split = self.0.rfind('.').expect("enforced by constructor");
+
&self.0[split + 1..]
+
}
+
+
pub fn as_str(&self) -> &str {
+
{
+
let this = &self.0;
+
this
+
}
+
}
+
}
+
+
impl<'n> FromStr for Nsid<'n> {
+
type Err = &'static str;
+
+
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
+
/// Prefer `Nsid::new()` or `Nsid::raw` if you want to borrow.
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
+
Self::new_owned(s)
+
}
+
}
+
+
impl IntoStatic for Nsid<'_> {
+
type Output = Nsid<'static>;
+
+
fn into_static(self) -> Self::Output {
+
Nsid(self.0.into_static())
+
}
+
}
+
+
impl<'de> Deserialize<'de> for Nsid<'de> {
+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+
where
+
D: Deserializer<'de>,
+
{
+
let value: &str = Deserialize::deserialize(deserializer)?;
+
Self::new(value).map_err(D::Error::custom)
+
}
+
}
+
+
impl fmt::Display for Nsid<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
f.write_str(&self.0)
+
}
+
}
+
+
impl fmt::Debug for Nsid<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
write!(f, "at://{}", self.0)
+
}
+
}
+
+
impl<'n> From<Nsid<'n>> for String {
+
fn from(value: Nsid) -> Self {
+
value.0.to_string()
+
}
+
}
+
+
impl<'n> From<Nsid<'n>> for CowStr<'n> {
+
fn from(value: Nsid<'n>) -> Self {
+
value.0
+
}
+
}
+
+
impl From<Nsid<'_>> for SmolStr {
+
fn from(value: Nsid) -> Self {
+
value.0.to_smolstr()
+
}
+
}
+
+
impl<'n> From<String> for Nsid<'n> {
+
fn from(value: String) -> Self {
+
if value.len() > 317 {
+
panic!("NSID too long")
+
} else if !NSID_REGEX.is_match(&value) {
+
panic!("Invalid NSID")
+
} else {
+
Self(CowStr::Owned(value.to_smolstr()))
+
}
+
}
+
}
+
+
impl<'n> From<CowStr<'n>> for Nsid<'n> {
+
fn from(value: CowStr<'n>) -> Self {
+
if value.len() > 317 {
+
panic!("NSID too long")
+
} else if !NSID_REGEX.is_match(&value) {
+
panic!("Invalid NSID")
+
} else {
+
Self(value)
+
}
+
}
+
}
+
+
impl From<SmolStr> for Nsid<'_> {
+
fn from(value: SmolStr) -> Self {
+
if value.len() > 317 {
+
panic!("NSID too long")
+
} else if !NSID_REGEX.is_match(&value) {
+
panic!("Invalid NSID")
+
} else {
+
Self(CowStr::Owned(value))
+
}
+
}
+
}
+
+
impl AsRef<str> for Nsid<'_> {
+
fn as_ref(&self) -> &str {
+
self.as_str()
+
}
+
}
+
+
impl Deref for Nsid<'_> {
+
type Target = str;
+
+
fn deref(&self) -> &Self::Target {
+
self.as_str()
+
}
+
}
+
+
unsafe impl RecordKeyType for Nsid<'_> {
+
fn as_str(&self) -> &str {
+
self.as_str()
+
}
+
}
+402
crates/jacquard-common/src/types/recordkey.rs
···
+
use crate::types::Literal;
+
use crate::{CowStr, IntoStatic};
+
use regex::Regex;
+
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::{SmolStr, ToSmolStr};
+
use std::fmt;
+
use std::marker::PhantomData;
+
use std::sync::LazyLock;
+
use std::{ops::Deref, str::FromStr};
+
+
/// Trait for generic typed record keys
+
///
+
/// This is deliberately public (so that consumers can develop specialized record key types),
+
/// but is marked as unsafe, because the implementer is expected to uphold the invariants
+
/// required by this trait, namely compliance with the [spec](https://atproto.com/specs/record-key)
+
/// as described by [`RKEY_REGEX`](RKEY_REGEX).
+
///
+
/// This crate provides implementations for TID, NSID, literals, and generic strings
+
pub unsafe trait RecordKeyType: Clone + Serialize {
+
fn as_str(&self) -> &str;
+
}
+
+
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash, Debug)]
+
#[serde(transparent)]
+
#[repr(transparent)]
+
pub struct RecordKey<T: RecordKeyType>(pub T);
+
+
impl<T> From<T> for RecordKey<Rkey<'_>>
+
where
+
T: RecordKeyType,
+
{
+
fn from(value: T) -> Self {
+
RecordKey(Rkey::from_str(value.as_str()).expect("Invalid rkey"))
+
}
+
}
+
+
impl<T> AsRef<str> for RecordKey<T>
+
where
+
T: RecordKeyType,
+
{
+
fn as_ref(&self) -> &str {
+
self.0.as_str()
+
}
+
}
+
+
impl<T> IntoStatic for RecordKey<T>
+
where
+
T: IntoStatic + RecordKeyType,
+
T::Output: RecordKeyType,
+
{
+
type Output = RecordKey<T::Output>;
+
+
fn into_static(self) -> Self::Output {
+
RecordKey(self.0.into_static())
+
}
+
}
+
+
/// ATProto Record Key (type `any`)
+
/// Catch-all for any string meeting the overall Record Key requirements detailed https://atproto.com/specs/record-key
+
#[derive(Clone, PartialEq, Eq, Serialize, Hash)]
+
#[serde(transparent)]
+
#[repr(transparent)]
+
pub struct Rkey<'r>(CowStr<'r>);
+
+
unsafe impl<'r> RecordKeyType for Rkey<'r> {
+
fn as_str(&self) -> &str {
+
self.0.as_ref()
+
}
+
}
+
+
pub static RKEY_REGEX: LazyLock<Regex> =
+
LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9.\-_:~]{1,512}$").unwrap());
+
+
/// AT Protocol rkey
+
impl<'r> Rkey<'r> {
+
/// Fallible constructor, validates, borrows from input
+
pub fn new(rkey: &'r str) -> Result<Self, &'static str> {
+
if [".", ".."].contains(&rkey) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(rkey) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self(CowStr::Borrowed(rkey)))
+
}
+
}
+
+
/// Fallible constructor, validates, borrows from input
+
pub fn new_owned(rkey: impl AsRef<str>) -> Result<Self, &'static str> {
+
let rkey = rkey.as_ref();
+
if [".", ".."].contains(&rkey) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(rkey) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self(CowStr::Owned(rkey.to_smolstr())))
+
}
+
}
+
+
/// Fallible constructor, validates, doesn't allocate
+
pub fn new_static(rkey: &'static str) -> Result<Self, &'static str> {
+
if [".", ".."].contains(&rkey) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(rkey) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self(CowStr::new_static(rkey)))
+
}
+
}
+
+
/// Infallible constructor for when you *know* the string is a valid rkey.
+
/// Will panic on invalid rkeys. If you're manually decoding atproto records
+
/// or API values you know are valid (rather than using serde), this is the one to use.
+
/// The From impls use the same logic.
+
pub fn raw(rkey: &'r str) -> Self {
+
if [".", ".."].contains(&rkey) {
+
panic!("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(rkey) {
+
panic!("Invalid rkey")
+
} else {
+
Self(CowStr::Borrowed(rkey))
+
}
+
}
+
+
/// Infallible constructor for when you *know* the string is a valid rkey.
+
/// Marked unsafe because responsibility for upholding the invariant is on the developer.
+
pub unsafe fn unchecked(rkey: &'r str) -> Self {
+
Self(CowStr::Borrowed(rkey))
+
}
+
+
pub fn as_str(&self) -> &str {
+
{
+
let this = &self.0;
+
this
+
}
+
}
+
}
+
+
impl<'r> FromStr for Rkey<'r> {
+
type Err = &'static str;
+
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
+
if [".", ".."].contains(&s) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(s) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self(CowStr::Owned(s.to_smolstr())))
+
}
+
}
+
}
+
+
impl IntoStatic for Rkey<'_> {
+
type Output = Rkey<'static>;
+
+
fn into_static(self) -> Self::Output {
+
Rkey(self.0.into_static())
+
}
+
}
+
+
impl<'de> Deserialize<'de> for Rkey<'de> {
+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+
where
+
D: Deserializer<'de>,
+
{
+
let value: &str = Deserialize::deserialize(deserializer)?;
+
Self::new(value).map_err(D::Error::custom)
+
}
+
}
+
+
impl fmt::Display for Rkey<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
f.write_str(&self.0)
+
}
+
}
+
+
impl fmt::Debug for Rkey<'_> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
write!(f, "record-key:{}", self.0)
+
}
+
}
+
+
impl From<Rkey<'_>> for String {
+
fn from(value: Rkey<'_>) -> Self {
+
value.0.to_string()
+
}
+
}
+
+
impl<'r> From<Rkey<'r>> for CowStr<'r> {
+
fn from(value: Rkey<'r>) -> Self {
+
value.0
+
}
+
}
+
+
impl<'r> From<Rkey<'r>> for SmolStr {
+
fn from(value: Rkey) -> Self {
+
value.0.to_smolstr()
+
}
+
}
+
+
impl<'r> From<String> for Rkey<'r> {
+
fn from(value: String) -> Self {
+
if [".", ".."].contains(&value.as_str()) {
+
panic!("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(&value) {
+
panic!("Invalid rkey")
+
} else {
+
Self(CowStr::Owned(value.to_smolstr()))
+
}
+
}
+
}
+
+
impl<'r> From<CowStr<'r>> for Rkey<'r> {
+
fn from(value: CowStr<'r>) -> Self {
+
if [".", ".."].contains(&value.as_ref()) {
+
panic!("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(&value) {
+
panic!("Invalid rkey")
+
} else {
+
Self(value)
+
}
+
}
+
}
+
+
impl AsRef<str> for Rkey<'_> {
+
fn as_ref(&self) -> &str {
+
self.as_str()
+
}
+
}
+
+
impl Deref for Rkey<'_> {
+
type Target = str;
+
+
fn deref(&self) -> &Self::Target {
+
self.0.as_ref()
+
}
+
}
+
+
/// ATProto Record Key (type `literal:<value>`)
+
/// Zero-sized type, literal is associated constant of type parameter
+
///
+
/// TODO: macro to construct arbitrary ones of these and the associated marker struct
+
#[derive(Clone, PartialEq, Eq, Serialize, Hash)]
+
pub struct LiteralKey<T: Literal = SelfRecord> {
+
literal: PhantomData<T>,
+
}
+
+
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+
pub struct SelfRecord;
+
+
impl Literal for SelfRecord {
+
const LITERAL: &'static str = "self";
+
}
+
+
unsafe impl<T: Literal> RecordKeyType for LiteralKey<T> {
+
fn as_str(&self) -> &str {
+
T::LITERAL
+
}
+
}
+
+
/// AT Protocol rkey
+
impl<T: Literal> LiteralKey<T> {
+
/// Fallible constructor, validates, borrows from input
+
pub fn new(rkey: impl AsRef<str>) -> Result<Self, &'static str> {
+
let rkey = rkey.as_ref();
+
if !rkey.eq_ignore_ascii_case(T::LITERAL) {
+
Err("Invalid literal rkey - does not match literal")
+
} else if [".", ".."].contains(&rkey) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(rkey) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self {
+
literal: PhantomData,
+
})
+
}
+
}
+
+
/// Infallible constructor for when you *know* the string is a valid rkey.
+
/// Will panic on invalid rkeys. If you're manually decoding atproto records
+
/// or API values you know are valid (rather than using serde), this is the one to use.
+
/// The From<String> and From<CowStr> impls use the same logic.
+
pub fn raw(rkey: &str) -> Self {
+
if !rkey.eq_ignore_ascii_case(T::LITERAL) {
+
panic!(
+
"Invalid literal rkey - does not match literal {}",
+
T::LITERAL
+
)
+
} else if [".", ".."].contains(&rkey.as_ref()) {
+
panic!("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(rkey) {
+
panic!("Invalid rkey")
+
} else {
+
Self {
+
literal: PhantomData,
+
}
+
}
+
}
+
+
/// Infallible type constructor
+
///
+
/// # Safety
+
/// Does not validate that the literal is a valid record key
+
pub unsafe fn t() -> Self {
+
Self {
+
literal: PhantomData,
+
}
+
}
+
+
pub fn as_str(&self) -> &str {
+
T::LITERAL
+
}
+
}
+
+
impl<T: Literal> FromStr for LiteralKey<T> {
+
type Err = &'static str;
+
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
+
Self::new(s)
+
}
+
}
+
+
impl<'de, T: Literal> Deserialize<'de> for LiteralKey<T> {
+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+
where
+
D: Deserializer<'de>,
+
{
+
let value: &str = Deserialize::deserialize(deserializer)?;
+
Self::new(value).map_err(D::Error::custom)
+
}
+
}
+
+
impl<T: Literal> fmt::Display for LiteralKey<T> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
f.write_str(T::LITERAL)
+
}
+
}
+
+
impl<T: Literal> fmt::Debug for LiteralKey<T> {
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+
write!(f, "literal:{}", T::LITERAL)
+
}
+
}
+
+
impl<'r, T: Literal> From<LiteralKey<T>> for String {
+
fn from(_value: LiteralKey<T>) -> Self {
+
T::LITERAL.to_string()
+
}
+
}
+
+
impl<'r, T: Literal> From<LiteralKey<T>> for CowStr<'r> {
+
fn from(_value: LiteralKey<T>) -> Self {
+
CowStr::Borrowed(T::LITERAL)
+
}
+
}
+
+
impl<T: Literal> TryFrom<String> for LiteralKey<T> {
+
type Error = &'static str;
+
fn try_from(value: String) -> Result<Self, Self::Error> {
+
if !value.eq_ignore_ascii_case(T::LITERAL) {
+
Err("Invalid literal rkey - does not match literal")
+
} else if [".", ".."].contains(&value.as_str()) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(&value) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self {
+
literal: PhantomData,
+
})
+
}
+
}
+
}
+
+
impl<'r, T: Literal> TryFrom<CowStr<'r>> for LiteralKey<T> {
+
type Error = &'static str;
+
fn try_from(value: CowStr<'r>) -> Result<Self, Self::Error> {
+
if !value.eq_ignore_ascii_case(T::LITERAL) {
+
Err("Invalid literal rkey - does not match literal")
+
} else if [".", ".."].contains(&value.as_ref()) {
+
Err("Disallowed rkey")
+
} else if !RKEY_REGEX.is_match(&value) {
+
Err("Invalid rkey")
+
} else {
+
Ok(Self {
+
literal: PhantomData,
+
})
+
}
+
}
+
}
+
+
impl<T: Literal> AsRef<str> for LiteralKey<T> {
+
fn as_ref(&self) -> &str {
+
self.as_str()
+
}
+
}
+
+
impl<T: Literal> Deref for LiteralKey<T> {
+
type Target = str;
+
+
fn deref(&self) -> &Self::Target {
+
self.as_str()
+
}
+
}
+38 -43
crates/jacquard-common/src/types/tid.rs
···
+
use serde::{Deserialize, Deserializer, Serialize, de::Error};
+
use smol_str::{SmolStr, SmolStrBuilder};
use std::fmt;
use std::sync::LazyLock;
use std::{ops::Deref, str::FromStr};
-
use compact_str::{CompactString, ToCompactString};
-
use serde::{Deserialize, Deserializer, Serialize, de::Error};
-
+
use crate::CowStr;
use crate::types::integer::LimitedU32;
-
use crate::{CowStr, IntoStatic};
use regex::Regex;
-
fn s32_encode(mut i: u64) -> CowStr<'static> {
+
fn s32_encode(mut i: u64) -> SmolStr {
const S32_CHAR: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz";
-
let mut s = CompactString::with_capacity(13);
+
let mut s = SmolStrBuilder::new();
for _ in 0..13 {
let c = i & 0x1F;
s.push(S32_CHAR[c as usize] as char);
···
i >>= 5;
}
-
// Reverse the string to convert it to big-endian format.
-
CowStr::Owned(s.chars().rev().collect())
+
let mut builder = SmolStrBuilder::new();
+
for c in s.finish().chars().rev() {
+
builder.push(c);
+
}
+
builder.finish()
}
static TID_REGEX: LazyLock<Regex> = LazyLock::new(|| {
···
/// [Timestamp Identifier]: https://atproto.com/specs/tid
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Hash)]
#[serde(transparent)]
-
pub struct Tid<'t>(CowStr<'t>);
+
#[repr(transparent)]
+
pub struct Tid(SmolStr);
-
impl<'t> Tid<'t> {
+
impl Tid {
/// Parses a `TID` from the given string.
-
pub fn new(tid: &'t str) -> Result<Self, &'static str> {
-
if tid.len() != 13 {
-
Err("TID must be 13 characters")
-
} else if !TID_REGEX.is_match(&tid) {
-
Err("Invalid TID")
-
} else {
-
Ok(Self(CowStr::Owned(tid.to_compact_string())))
-
}
-
}
-
-
/// Fallible constructor from an existing CowStr, takes ownership
-
pub fn from_cowstr(tid: CowStr<'t>) -> Result<Tid<'t>, &'static str> {
+
pub fn new(tid: impl AsRef<str>) -> Result<Self, &'static str> {
+
let tid = tid.as_ref();
if tid.len() != 13 {
Err("TID must be 13 characters")
-
} else if !TID_REGEX.is_match(&tid) {
+
} else if !TID_REGEX.is_match(&tid.as_ref()) {
Err("Invalid TID")
} else {
-
Ok(Self(tid.into_static()))
+
Ok(Self(SmolStr::new_inline(&tid)))
}
}
···
/// Will panic on invalid TID. If you're manually decoding atproto records
/// or API values you know are valid (rather than using serde), this is the one to use.
/// The From<String> and From<CowStr> impls use the same logic.
-
pub fn raw(tid: &'t str) -> Self {
+
pub fn raw(tid: impl AsRef<str>) -> Self {
+
let tid = tid.as_ref();
if tid.len() != 13 {
panic!("TID must be 13 characters")
} else if !TID_REGEX.is_match(&tid) {
panic!("Invalid TID")
} else {
-
Self(CowStr::Borrowed(tid))
+
Self(SmolStr::new_inline(tid))
}
}
/// Infallible constructor for when you *know* the string is a valid TID.
/// Marked unsafe because responsibility for upholding the invariant is on the developer.
-
pub unsafe fn unchecked(tid: &'t str) -> Self {
-
Self(CowStr::Borrowed(tid))
+
pub unsafe fn unchecked(tid: impl AsRef<str>) -> Self {
+
let tid = tid.as_ref();
+
Self(SmolStr::new_inline(tid))
}
/// Construct a new timestamp with the specified clock ID.
···
}
}
-
impl FromStr for Tid<'_> {
+
impl FromStr for Tid {
type Err = &'static str;
/// Has to take ownership due to the lifetime constraints of the FromStr trait.
/// Prefer `Did::new()` or `Did::raw` if you want to borrow.
fn from_str(s: &str) -> Result<Self, Self::Err> {
-
Self::from_cowstr(CowStr::Borrowed(s).into_static())
+
Self::new(s)
}
}
-
impl<'de> Deserialize<'de> for Tid<'de> {
+
impl<'de> Deserialize<'de> for Tid {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
-
let value = Deserialize::deserialize(deserializer)?;
+
let value: &str = Deserialize::deserialize(deserializer)?;
Self::new(value).map_err(D::Error::custom)
}
}
-
impl fmt::Display for Tid<'_> {
+
impl fmt::Display for Tid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
-
impl<'t> From<Tid<'t>> for String {
-
fn from(value: Tid<'t>) -> Self {
+
impl From<Tid> for String {
+
fn from(value: Tid) -> Self {
value.0.to_string()
}
}
-
impl<'t> From<Tid<'t>> for CowStr<'t> {
-
fn from(value: Tid<'t>) -> Self {
+
impl From<Tid> for SmolStr {
+
fn from(value: Tid) -> Self {
value.0
}
}
-
impl From<String> for Tid<'static> {
+
impl From<String> for Tid {
fn from(value: String) -> Self {
if value.len() != 13 {
panic!("TID must be 13 characters")
} else if !TID_REGEX.is_match(&value) {
panic!("Invalid TID")
} else {
-
Self(CowStr::Owned(value.to_compact_string()))
+
Self(SmolStr::new_inline(&value))
}
}
}
-
impl<'t> From<CowStr<'t>> for Tid<'t> {
+
impl<'t> From<CowStr<'t>> for Tid {
fn from(value: CowStr<'t>) -> Self {
if value.len() != 13 {
panic!("TID must be 13 characters")
} else if !TID_REGEX.is_match(&value) {
panic!("Invalid TID")
} else {
-
Self(value)
+
Self(SmolStr::new_inline(&value))
}
}
}
-
impl AsRef<str> for Tid<'_> {
+
impl AsRef<str> for Tid {
fn as_ref(&self) -> &str {
self.as_str()
}
}
-
impl Deref for Tid<'_> {
+
impl Deref for Tid {
type Target = str;
fn deref(&self) -> &Self::Target {