A better Rust ATProto crate
at lifetimes 11 kB view raw
1use crate::corpus::LexiconCorpus; 2use crate::lexicon::{ 3 LexArrayItem, LexObjectProperty, LexUserType, LexXrpcBodySchema, 4 LexXrpcSubscriptionMessageSchema, 5}; 6use jacquard_common::smol_str::{SmolStr, ToSmolStr}; 7use jacquard_common::{CowStr, smol_str}; 8use std::collections::{BTreeMap, BTreeSet}; 9 10/// Information about a single union type found in the corpus 11#[derive(Debug, Clone)] 12pub struct UnionInfo { 13 /// NSID of the lexicon containing this union 14 pub lexicon_nsid: SmolStr, 15 /// Name of the def containing this union (e.g., "main", "replyRef") 16 pub def_name: SmolStr, 17 /// Field path within the def (e.g., "embed", "properties.embed") 18 pub field_path: CowStr<'static>, 19 /// Refs that exist in the corpus 20 pub known_refs: Vec<CowStr<'static>>, 21 /// Refs that don't exist in the corpus 22 pub unknown_refs: Vec<CowStr<'static>>, 23 /// Whether the union is closed (default true if not specified) 24 pub closed: bool, 25} 26 27impl UnionInfo { 28 /// Get the source text for this union's lexicon from the corpus 29 pub fn get_source<'c>(&self, corpus: &'c LexiconCorpus) -> Option<&'c str> { 30 corpus.get_source(&self.lexicon_nsid) 31 } 32 33 /// Check if this union has any unknown refs 34 pub fn has_unknown_refs(&self) -> bool { 35 !self.unknown_refs.is_empty() 36 } 37 38 /// Get all refs (known + unknown) 39 pub fn all_refs(&self) -> impl Iterator<Item = &CowStr<'static>> { 40 self.known_refs.iter().chain(self.unknown_refs.iter()) 41 } 42} 43 44/// Registry of all union types found in the corpus 45#[derive(Debug, Clone)] 46pub struct UnionRegistry { 47 /// Map from union identifier to union info 48 /// Key is "{lexicon_nsid}#{def_name}:{field_path}" 49 unions: BTreeMap<SmolStr, UnionInfo>, 50} 51 52impl UnionRegistry { 53 /// Create a new empty union registry 54 pub fn new() -> Self { 55 Self { 56 unions: BTreeMap::new(), 57 } 58 } 59 60 /// Build a union registry from a corpus 61 pub fn from_corpus(corpus: &LexiconCorpus) -> Self { 62 let mut registry = Self::new(); 63 64 for (nsid, doc) in corpus.iter() { 65 for (def_name, def) in &doc.defs { 66 registry.collect_unions_from_def(corpus, nsid, def_name, def); 67 } 68 } 69 70 registry 71 } 72 73 /// Collect unions from a single def 74 fn collect_unions_from_def( 75 &mut self, 76 corpus: &LexiconCorpus, 77 nsid: &SmolStr, 78 def_name: &SmolStr, 79 def: &LexUserType<'static>, 80 ) { 81 match def { 82 LexUserType::Record(record) => match &record.record { 83 crate::lexicon::LexRecordRecord::Object(obj) => { 84 self.collect_unions_from_object(corpus, nsid, def_name, "", obj); 85 } 86 }, 87 LexUserType::Object(obj) => { 88 self.collect_unions_from_object(corpus, nsid, def_name, "", obj); 89 } 90 LexUserType::XrpcQuery(query) => { 91 if let Some(output) = &query.output { 92 if let Some(schema) = &output.schema { 93 self.collect_unions_from_xrpc_body_schema( 94 corpus, nsid, def_name, "output", schema, 95 ); 96 } 97 } 98 } 99 LexUserType::XrpcProcedure(proc) => { 100 if let Some(input) = &proc.input { 101 if let Some(schema) = &input.schema { 102 self.collect_unions_from_xrpc_body_schema( 103 corpus, nsid, def_name, "input", schema, 104 ); 105 } 106 } 107 if let Some(output) = &proc.output { 108 if let Some(schema) = &output.schema { 109 self.collect_unions_from_xrpc_body_schema( 110 corpus, nsid, def_name, "output", schema, 111 ); 112 } 113 } 114 } 115 LexUserType::XrpcSubscription(sub) => { 116 if let Some(message) = &sub.message { 117 if let Some(schema) = &message.schema { 118 self.collect_unions_from_subscription_message_schema( 119 corpus, nsid, def_name, "message", schema, 120 ); 121 } 122 } 123 } 124 _ => {} 125 } 126 } 127 128 /// Collect unions from an object's properties 129 fn collect_unions_from_object( 130 &mut self, 131 corpus: &LexiconCorpus, 132 nsid: &SmolStr, 133 def_name: &SmolStr, 134 path_prefix: &str, 135 obj: &crate::lexicon::LexObject<'static>, 136 ) { 137 for (prop_name, prop) in &obj.properties { 138 let prop_path = if path_prefix.is_empty() { 139 prop_name.to_smolstr() 140 } else { 141 smol_str::format_smolstr!("{}.{}", path_prefix, prop_name) 142 }; 143 144 match prop { 145 LexObjectProperty::Union(union) => { 146 self.register_union( 147 corpus, 148 nsid, 149 def_name, 150 &prop_path, 151 &union.refs, 152 union.closed, 153 ); 154 } 155 LexObjectProperty::Array(array) => { 156 if let LexArrayItem::Union(union) = &array.items { 157 let array_path = format!("{}[]", prop_path); 158 self.register_union( 159 corpus, 160 nsid, 161 def_name, 162 &array_path, 163 &union.refs, 164 union.closed, 165 ); 166 } 167 } 168 LexObjectProperty::Ref(ref_type) => { 169 // Check if ref points to a union 170 if let Some((_, ref_def)) = corpus.resolve_ref(ref_type.r#ref.as_ref()) { 171 if matches!(ref_def, LexUserType::Object(_)) { 172 // Recursively check the referenced object 173 // (we'll handle this in a future iteration if needed) 174 } 175 } 176 } 177 _ => {} 178 } 179 } 180 } 181 182 /// Collect unions from XRPC body schema 183 fn collect_unions_from_xrpc_body_schema( 184 &mut self, 185 corpus: &LexiconCorpus, 186 nsid: &SmolStr, 187 def_name: &SmolStr, 188 path: &str, 189 schema: &LexXrpcBodySchema<'static>, 190 ) { 191 match schema { 192 LexXrpcBodySchema::Union(union) => { 193 self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed); 194 } 195 LexXrpcBodySchema::Object(obj) => { 196 self.collect_unions_from_object(corpus, nsid, def_name, path, obj); 197 } 198 _ => {} 199 } 200 } 201 202 /// Collect unions from subscription message schema 203 fn collect_unions_from_subscription_message_schema( 204 &mut self, 205 corpus: &LexiconCorpus, 206 nsid: &SmolStr, 207 def_name: &SmolStr, 208 path: &str, 209 schema: &LexXrpcSubscriptionMessageSchema<'static>, 210 ) { 211 match schema { 212 LexXrpcSubscriptionMessageSchema::Union(union) => { 213 self.register_union(corpus, nsid, def_name, path, &union.refs, union.closed); 214 } 215 LexXrpcSubscriptionMessageSchema::Object(obj) => { 216 self.collect_unions_from_object(corpus, nsid, def_name, path, obj); 217 } 218 _ => {} 219 } 220 } 221 222 /// Register a union with the registry 223 fn register_union( 224 &mut self, 225 corpus: &LexiconCorpus, 226 nsid: &SmolStr, 227 def_name: &SmolStr, 228 field_path: &str, 229 refs: &[jacquard_common::CowStr<'static>], 230 closed: Option<bool>, 231 ) { 232 let mut known_refs = Vec::new(); 233 let mut unknown_refs = Vec::new(); 234 235 for ref_str in refs { 236 if corpus.ref_exists(&ref_str) { 237 known_refs.push(ref_str.clone()); 238 } else { 239 unknown_refs.push(ref_str.clone()); 240 } 241 } 242 243 let key = smol_str::format_smolstr!("{}#{}:{}", nsid, def_name, field_path); 244 self.unions.insert( 245 key, 246 UnionInfo { 247 lexicon_nsid: nsid.clone(), 248 def_name: def_name.clone(), 249 field_path: CowStr::Owned(field_path.to_smolstr()), 250 known_refs, 251 unknown_refs, 252 closed: closed.unwrap_or(true), 253 }, 254 ); 255 } 256 257 /// Get all unions 258 pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &UnionInfo)> { 259 self.unions.iter() 260 } 261 262 /// Get a specific union 263 pub fn get(&self, key: &str) -> Option<&UnionInfo> { 264 self.unions.get(key) 265 } 266 267 /// Number of unions in registry 268 pub fn len(&self) -> usize { 269 self.unions.len() 270 } 271 272 /// Check if registry is empty 273 pub fn is_empty(&self) -> bool { 274 self.unions.is_empty() 275 } 276 277 /// Get all unique refs across all unions 278 pub fn all_refs(&self) -> BTreeSet<CowStr<'static>> { 279 let mut refs = BTreeSet::new(); 280 for union in self.unions.values() { 281 refs.extend(union.known_refs.iter().cloned()); 282 refs.extend(union.unknown_refs.iter().cloned()); 283 } 284 refs 285 } 286} 287 288impl Default for UnionRegistry { 289 fn default() -> Self { 290 Self::new() 291 } 292} 293 294#[cfg(test)] 295mod tests { 296 use super::*; 297 298 #[test] 299 fn test_union_registry_from_corpus() { 300 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 301 .expect("failed to load lexicons"); 302 303 let registry = UnionRegistry::from_corpus(&corpus); 304 305 assert!(!registry.is_empty()); 306 307 // Check that we found the embed union in post 308 let post_embed = registry 309 .iter() 310 .find(|(_, info)| { 311 info.lexicon_nsid == "app.bsky.feed.post" 312 && info.def_name == "main" 313 && info.field_path.contains("embed") 314 }) 315 .expect("should find post embed union"); 316 317 let info = post_embed.1; 318 assert!(info.known_refs.contains(&"app.bsky.embed.images".into())); 319 assert!(info.known_refs.contains(&"app.bsky.embed.video".into())); 320 assert!(info.known_refs.contains(&"app.bsky.embed.external".into())); 321 } 322 323 #[test] 324 fn test_union_registry_tracks_unknown_refs() { 325 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 326 .expect("failed to load lexicons"); 327 328 let registry = UnionRegistry::from_corpus(&corpus); 329 330 // If there are any unknown refs, they should be tracked 331 for (_, info) in registry.iter() { 332 for unknown in &info.unknown_refs { 333 assert!(!corpus.ref_exists(unknown)); 334 } 335 } 336 } 337}