A better Rust ATProto crate
at main 13 kB view raw
1use crate::error::{CodegenError, Result}; 2use proc_macro2::TokenStream; 3use quote::quote; 4use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; 5 6use super::utils::{make_ident, sanitize_name}; 7use super::CodeGenerator; 8 9impl<'c> CodeGenerator<'c> { 10 /// Generate all code for the corpus, organized by file 11 /// Returns a map of file paths to (tokens, optional NSID) 12 pub fn generate_all( 13 &self, 14 ) -> Result<BTreeMap<std::path::PathBuf, (TokenStream, Option<String>)>> { 15 let mut file_contents: BTreeMap<std::path::PathBuf, Vec<TokenStream>> = BTreeMap::new(); 16 let mut file_nsids: BTreeMap<std::path::PathBuf, String> = BTreeMap::new(); 17 18 // Generate code for all lexicons 19 for (nsid, doc) in self.corpus.iter() { 20 let file_path = self.nsid_to_file_path(nsid.as_ref()); 21 22 // Track which NSID this file is for 23 file_nsids.insert(file_path.clone(), nsid.to_string()); 24 25 for (def_name, def) in &doc.defs { 26 let tokens = self.generate_def(nsid.as_ref(), def_name.as_ref(), def)?; 27 file_contents 28 .entry(file_path.clone()) 29 .or_default() 30 .push(tokens); 31 } 32 } 33 34 // Combine all tokens for each file 35 let mut result = BTreeMap::new(); 36 for (path, tokens_vec) in file_contents { 37 let nsid = file_nsids.get(&path).cloned(); 38 result.insert(path, (quote! { #(#tokens_vec)* }, nsid)); 39 } 40 41 Ok(result) 42 } 43 44 /// Generate parent module files with pub mod declarations 45 pub fn generate_module_tree( 46 &self, 47 file_map: &BTreeMap<std::path::PathBuf, (TokenStream, Option<String>)>, 48 defs_only: &BTreeMap<std::path::PathBuf, (TokenStream, Option<String>)>, 49 ) -> BTreeMap<std::path::PathBuf, (TokenStream, Option<String>)> { 50 // Track what modules each directory needs to declare 51 // Key: directory path, Value: set of module names (file stems) 52 let mut dir_modules: BTreeMap<std::path::PathBuf, BTreeSet<String>> = BTreeMap::new(); 53 54 // Collect all parent directories that have files 55 let mut all_dirs: BTreeSet<std::path::PathBuf> = BTreeSet::new(); 56 for path in file_map.keys() { 57 if let Some(parent_dir) = path.parent() { 58 all_dirs.insert(parent_dir.to_path_buf()); 59 } 60 } 61 62 for path in file_map.keys() { 63 if let Some(parent_dir) = path.parent() { 64 if let Some(file_stem) = path.file_stem().and_then(|s| s.to_str()) { 65 // Skip mod.rs and lib.rs - they're module files, not modules to declare 66 if file_stem == "mod" || file_stem == "lib" { 67 continue; 68 } 69 70 // Always add the module declaration to parent 71 dir_modules 72 .entry(parent_dir.to_path_buf()) 73 .or_default() 74 .insert(file_stem.to_string()); 75 } 76 } 77 } 78 79 // Generate module files 80 let mut result = BTreeMap::new(); 81 82 for (dir, module_names) in dir_modules { 83 let mod_file_path = if dir.components().count() == 0 { 84 // Root directory -> lib.rs for library crates 85 std::path::PathBuf::from("lib.rs") 86 } else { 87 // Subdirectory: app_bsky/feed -> app_bsky/feed.rs (Rust 2018 style) 88 let dir_name = dir.file_name().and_then(|s| s.to_str()).unwrap_or("mod"); 89 let sanitized_dir_name = sanitize_name(dir_name); 90 let mut path = dir 91 .parent() 92 .unwrap_or_else(|| std::path::Path::new("")) 93 .to_path_buf(); 94 path.push(format!("{}.rs", sanitized_dir_name)); 95 path 96 }; 97 98 let is_root = dir.components().count() == 0; 99 let mods: Vec<_> = module_names 100 .iter() 101 .map(|name| { 102 let ident = make_ident(name); 103 if is_root { 104 // Top-level modules get feature gates 105 quote! { 106 #[cfg(feature = #name)] 107 pub mod #ident; 108 } 109 } else { 110 quote! { pub mod #ident; } 111 } 112 }) 113 .collect(); 114 115 // If this file already exists in defs_only (e.g., from defs), merge the content 116 let module_tokens = quote! { #(#mods)* }; 117 if let Some((existing_tokens, nsid)) = defs_only.get(&mod_file_path) { 118 // Put module declarations FIRST, then existing defs content 119 result.insert( 120 mod_file_path, 121 (quote! { #module_tokens #existing_tokens }, nsid.clone()), 122 ); 123 } else { 124 result.insert(mod_file_path, (module_tokens, None)); 125 } 126 } 127 128 result 129 } 130 131 /// Write all generated code to disk 132 pub fn write_to_disk(&self, output_dir: &std::path::Path) -> Result<()> { 133 // Generate all code (defs only) 134 let defs_files = self.generate_all()?; 135 let mut all_files = defs_files.clone(); 136 137 // Generate module tree iteratively until no new files appear 138 loop { 139 let module_map = self.generate_module_tree(&all_files, &defs_files); 140 let old_count = all_files.len(); 141 142 // Merge new module files 143 for (path, tokens) in module_map { 144 all_files.insert(path, tokens); 145 } 146 147 if all_files.len() == old_count { 148 // No new files added 149 break; 150 } 151 } 152 153 // Write to disk 154 for (path, (tokens, nsid)) in all_files { 155 let full_path = output_dir.join(&path); 156 157 // Create parent directories 158 if let Some(parent) = full_path.parent() { 159 std::fs::create_dir_all(parent).map_err(|e| CodegenError::Other { 160 message: format!("Failed to create directory {:?}: {}", parent, e), 161 source: None, 162 })?; 163 } 164 165 // Format code 166 let file: syn::File = syn::parse2(tokens.clone()).map_err(|e| CodegenError::Other { 167 message: format!( 168 "Failed to parse tokens for {:?}: {}\nTokens: {}", 169 path, e, tokens 170 ), 171 source: None, 172 })?; 173 let mut formatted = prettyplease::unparse(&file); 174 175 // Add blank lines between top-level items for better readability 176 let lines: Vec<&str> = formatted.lines().collect(); 177 let mut result_lines = Vec::new(); 178 179 for (i, line) in lines.iter().enumerate() { 180 result_lines.push(*line); 181 182 // Add blank line after closing braces that are at column 0 (top-level items) 183 if *line == "}" && i + 1 < lines.len() && !lines[i + 1].is_empty() { 184 result_lines.push(""); 185 } 186 187 // Add blank line after last pub mod declaration before structs/enums 188 if line.starts_with("pub mod ") && i + 1 < lines.len() { 189 let next_line = lines[i + 1]; 190 if !next_line.starts_with("pub mod ") && !next_line.is_empty() { 191 result_lines.push(""); 192 } 193 } 194 } 195 196 formatted = result_lines.join("\n"); 197 198 // Add header comment 199 let header = if let Some(nsid) = nsid { 200 format!( 201 "// @generated by jacquard-lexicon. DO NOT EDIT.\n//\n// Lexicon: {}\n//\n// This file was automatically generated from Lexicon schemas.\n// Any manual changes will be overwritten on the next regeneration.\n\n", 202 nsid 203 ) 204 } else { 205 "// @generated by jacquard-lexicon. DO NOT EDIT.\n//\n// This file was automatically generated from Lexicon schemas.\n// Any manual changes will be overwritten on the next regeneration.\n\n".to_string() 206 }; 207 formatted = format!("{}{}", header, formatted); 208 209 // Write file 210 std::fs::write(&full_path, formatted).map_err(|e| CodegenError::Other { 211 message: format!("Failed to write file {:?}: {}", full_path, e), 212 source: None, 213 })?; 214 } 215 216 Ok(()) 217 } 218 219 /// Get namespace dependencies collected during code generation 220 pub fn get_namespace_dependencies( 221 &self, 222 ) -> HashMap<String, HashSet<String>> { 223 self.namespace_deps.borrow().clone() 224 } 225 226 /// Generate Cargo.toml features section from namespace dependencies 227 pub fn generate_cargo_features(&self, lib_rs_path: Option<&std::path::Path>) -> String { 228 use std::fmt::Write; 229 230 let deps = self.namespace_deps.borrow(); 231 let mut all_namespaces: HashSet<String> = 232 HashSet::new(); 233 234 // Collect all namespaces from the corpus (first two segments of each NSID) 235 for (nsid, _doc) in self.corpus.iter() { 236 let parts: Vec<_> = nsid.as_str().splitn(3, '.').collect(); 237 let namespace = if parts.len() >= 2 { 238 format!("{}.{}", parts[0], parts[1]) 239 } else { 240 nsid.to_string() 241 }; 242 all_namespaces.insert(namespace); 243 } 244 245 // Also collect existing feature names from lib.rs 246 let mut existing_features = HashSet::new(); 247 if let Some(lib_rs) = lib_rs_path { 248 if let Ok(content) = std::fs::read_to_string(lib_rs) { 249 for line in content.lines() { 250 if let Some(feature) = line 251 .trim() 252 .strip_prefix("#[cfg(feature = \"") 253 .and_then(|s| s.strip_suffix("\")]")) 254 { 255 existing_features.insert(feature.to_string()); 256 } 257 } 258 } 259 } 260 261 let mut output = String::new(); 262 writeln!(&mut output, "# Generated namespace features").unwrap(); 263 264 // Convert namespace to feature name (matching module path sanitization) 265 let to_feature_name = |ns: &str| { 266 ns.split('.') 267 .map(|segment| { 268 // Apply same sanitization as module names 269 let mut result = segment.replace('-', "_"); 270 // Prefix with underscore if starts with digit 271 if result.chars().next().map_or(false, |c| c.is_ascii_digit()) { 272 result.insert(0, '_'); 273 } 274 result 275 }) 276 .collect::<Vec<_>>() 277 .join("_") 278 }; 279 280 // Collect all feature names (from corpus + existing lib.rs) 281 let mut all_feature_names = HashSet::new(); 282 for ns in &all_namespaces { 283 all_feature_names.insert(to_feature_name(ns)); 284 } 285 all_feature_names.extend(existing_features); 286 287 // Sort for consistent output 288 let mut feature_names: Vec<_> = all_feature_names.iter().collect(); 289 feature_names.sort(); 290 291 // Map namespace to feature name for dependency lookup 292 let mut ns_to_feature: HashMap<&str, String> = 293 HashMap::new(); 294 for ns in &all_namespaces { 295 ns_to_feature.insert(ns.as_str(), to_feature_name(ns)); 296 } 297 298 for feature_name in feature_names { 299 // Find corresponding namespace for this feature (if any) to look up deps 300 let feature_deps: Vec<String> = all_namespaces 301 .iter() 302 .find(|ns| to_feature_name(ns) == *feature_name) 303 .and_then(|ns| deps.get(ns.as_str())) 304 .map(|ns_deps| { 305 let mut dep_features: Vec<_> = ns_deps 306 .iter() 307 .map(|d| format!("\"{}\"", to_feature_name(d))) 308 .collect(); 309 dep_features.sort(); 310 dep_features 311 }) 312 .unwrap_or_default(); 313 314 if !feature_deps.is_empty() { 315 writeln!( 316 &mut output, 317 "{} = [{}]", 318 feature_name, 319 feature_deps.join(", ") 320 ) 321 .unwrap(); 322 } else { 323 writeln!(&mut output, "{} = []", feature_name).unwrap(); 324 } 325 } 326 327 output 328 } 329}