1use crate::error::Result;
2use crate::lexicon::{LexUserType, LexiconDoc};
3use jacquard_common::{into_static::IntoStatic, smol_str::SmolStr};
4use std::collections::BTreeMap;
5use std::fs;
6use std::path::Path;
7
8/// Registry of all loaded lexicons for reference resolution
9#[derive(Debug, Clone)]
10pub struct LexiconCorpus {
11 /// Map from NSID to lexicon document
12 docs: BTreeMap<SmolStr, LexiconDoc<'static>>,
13 /// Map from NSID to original source text (for error reporting)
14 sources: BTreeMap<SmolStr, String>,
15}
16
17impl LexiconCorpus {
18 /// Create an empty corpus
19 pub fn new() -> Self {
20 Self {
21 docs: BTreeMap::new(),
22 sources: BTreeMap::new(),
23 }
24 }
25
26 /// Load all lexicons from a directory
27 pub fn load_from_dir(path: impl AsRef<Path>) -> Result<Self> {
28 let mut corpus = Self::new();
29
30 let schemas = crate::fs::find_schemas(path.as_ref())?;
31 for schema_path in schemas {
32 let content = fs::read_to_string(schema_path.as_ref())?;
33
34 // Try to parse as lexicon doc - skip files that aren't lexicon schemas
35 let doc: LexiconDoc = match serde_json::from_str(&content) {
36 Ok(doc) => doc,
37 Err(_) => continue, // Skip non-lexicon JSON files
38 };
39
40 let nsid = SmolStr::from(doc.id.to_string());
41 corpus.docs.insert(nsid.clone(), doc.into_static());
42 corpus.sources.insert(nsid, content);
43 }
44
45 Ok(corpus)
46 }
47
48 /// Get a lexicon document by NSID
49 pub fn get(&self, nsid: &str) -> Option<&LexiconDoc<'static>> {
50 self.docs.get(nsid)
51 }
52
53 /// Get the source text for a lexicon by NSID
54 pub fn get_source(&self, nsid: &str) -> Option<&str> {
55 self.sources.get(nsid).map(|s| s.as_str())
56 }
57
58 /// Resolve a reference, handling fragments
59 ///
60 /// Examples:
61 /// - `app.bsky.feed.post` → main def from that lexicon
62 /// - `app.bsky.feed.post#replyRef` → replyRef def from that lexicon
63 pub fn resolve_ref(
64 &self,
65 ref_str: &str,
66 ) -> Option<(&LexiconDoc<'static>, &LexUserType<'static>)> {
67 let (nsid, def_name) = if let Some((nsid, fragment)) = ref_str.split_once('#') {
68 (nsid, fragment)
69 } else {
70 (ref_str, "main")
71 };
72
73 let doc = self.get(nsid)?;
74 let def = doc.defs.get(def_name)?;
75 Some((doc, def))
76 }
77
78 /// Check if a reference exists
79 pub fn ref_exists(&self, ref_str: &str) -> bool {
80 self.resolve_ref(ref_str).is_some()
81 }
82
83 /// Iterate over all documents
84 pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &LexiconDoc<'static>)> {
85 self.docs.iter()
86 }
87
88 /// Number of loaded lexicons
89 pub fn len(&self) -> usize {
90 self.docs.len()
91 }
92
93 /// Check if corpus is empty
94 pub fn is_empty(&self) -> bool {
95 self.docs.is_empty()
96 }
97}
98
99impl Default for LexiconCorpus {
100 fn default() -> Self {
101 Self::new()
102 }
103}
104
105#[cfg(test)]
106mod tests {
107 use super::*;
108 use crate::lexicon::LexUserType;
109
110 #[test]
111 fn test_empty_corpus() {
112 let corpus = LexiconCorpus::new();
113 assert!(corpus.is_empty());
114 assert_eq!(corpus.len(), 0);
115 }
116
117 #[test]
118 fn test_load_lexicons() {
119 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
120 .expect("failed to load lexicons");
121
122 assert!(!corpus.is_empty());
123 assert_eq!(corpus.len(), 10);
124
125 // Check that we loaded the expected lexicons
126 assert!(corpus.get("app.bsky.feed.post").is_some());
127 assert!(corpus.get("app.bsky.feed.getAuthorFeed").is_some());
128 assert!(corpus.get("app.bsky.richtext.facet").is_some());
129 assert!(corpus.get("app.bsky.embed.images").is_some());
130 assert!(corpus.get("com.atproto.repo.strongRef").is_some());
131 assert!(corpus.get("com.atproto.label.defs").is_some());
132 }
133
134 #[test]
135 fn test_resolve_ref_without_fragment() {
136 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/lexicons")
137 .expect("failed to load lexicons");
138
139 // Without fragment should resolve to main def
140 let (doc, def) = corpus
141 .resolve_ref("app.bsky.feed.post")
142 .expect("should resolve");
143 assert_eq!(doc.id.as_ref(), "app.bsky.feed.post");
144 assert!(matches!(def, LexUserType::Record(_)));
145 }
146
147 #[test]
148 fn test_resolve_ref_with_fragment() {
149 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/lexicons")
150 .expect("failed to load lexicons");
151
152 // With fragment should resolve to specific def
153 let (doc, def) = corpus
154 .resolve_ref("app.bsky.richtext.facet#mention")
155 .expect("should resolve");
156 assert_eq!(doc.id.as_ref(), "app.bsky.richtext.facet");
157 assert!(matches!(def, LexUserType::Object(_)));
158 }
159
160 #[test]
161 fn test_ref_exists() {
162 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/lexicons")
163 .expect("failed to load lexicons");
164
165 // Existing refs
166 assert!(corpus.ref_exists("app.bsky.feed.post"));
167 assert!(corpus.ref_exists("app.bsky.feed.post#main"));
168 assert!(corpus.ref_exists("app.bsky.richtext.facet#mention"));
169
170 // Non-existing refs
171 assert!(!corpus.ref_exists("com.example.fake"));
172 assert!(!corpus.ref_exists("app.bsky.feed.post#nonexistent"));
173 }
174}