Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
1mod builder_types; 2mod place_wisp; 3mod cid; 4mod blob_map; 5mod metadata; 6mod download; 7mod pull; 8mod serve; 9mod subfs_utils; 10mod redirects; 11 12use clap::{Parser, Subcommand}; 13use jacquard::CowStr; 14use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; 15use jacquard::oauth::client::OAuthClient; 16use jacquard::oauth::loopback::LoopbackConfig; 17use jacquard::prelude::IdentityResolver; 18use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri}; 19use jacquard_common::types::blob::MimeType; 20use miette::IntoDiagnostic; 21use std::path::{Path, PathBuf}; 22use std::collections::HashMap; 23use flate2::Compression; 24use flate2::write::GzEncoder; 25use std::io::Write; 26use base64::Engine; 27use futures::stream::{self, StreamExt}; 28 29use place_wisp::fs::*; 30use place_wisp::settings::*; 31 32#[derive(Parser, Debug)] 33#[command(author, version, about = "wisp.place CLI tool")] 34struct Args { 35 #[command(subcommand)] 36 command: Option<Commands>, 37 38 // Deploy arguments (when no subcommand is specified) 39 /// Handle (e.g., alice.bsky.social), DID, or PDS URL 40 #[arg(global = true, conflicts_with = "command")] 41 input: Option<CowStr<'static>>, 42 43 /// Path to the directory containing your static site 44 #[arg(short, long, global = true, conflicts_with = "command")] 45 path: Option<PathBuf>, 46 47 /// Site name (defaults to directory name) 48 #[arg(short, long, global = true, conflicts_with = "command")] 49 site: Option<String>, 50 51 /// Path to auth store file 52 #[arg(long, global = true, conflicts_with = "command")] 53 store: Option<String>, 54 55 /// App Password for authentication 56 #[arg(long, global = true, conflicts_with = "command")] 57 password: Option<CowStr<'static>>, 58 59 /// Enable directory listing mode for paths without index files 60 #[arg(long, global = true, conflicts_with = "command")] 61 directory: bool, 62 63 /// Enable SPA mode (serve index.html for all routes) 64 #[arg(long, global = true, conflicts_with = "command")] 65 spa: bool, 66} 67 68#[derive(Subcommand, Debug)] 69enum Commands { 70 /// Deploy a static site to wisp.place (default command) 71 Deploy { 72 /// Handle (e.g., alice.bsky.social), DID, or PDS URL 73 input: CowStr<'static>, 74 75 /// Path to the directory containing your static site 76 #[arg(short, long, default_value = ".")] 77 path: PathBuf, 78 79 /// Site name (defaults to directory name) 80 #[arg(short, long)] 81 site: Option<String>, 82 83 /// Path to auth store file (will be created if missing, only used with OAuth) 84 #[arg(long, default_value = "/tmp/wisp-oauth-session.json")] 85 store: String, 86 87 /// App Password for authentication (alternative to OAuth) 88 #[arg(long)] 89 password: Option<CowStr<'static>>, 90 91 /// Enable directory listing mode for paths without index files 92 #[arg(long)] 93 directory: bool, 94 95 /// Enable SPA mode (serve index.html for all routes) 96 #[arg(long)] 97 spa: bool, 98 }, 99 /// Pull a site from the PDS to a local directory 100 Pull { 101 /// Handle (e.g., alice.bsky.social) or DID 102 input: CowStr<'static>, 103 104 /// Site name (record key) 105 #[arg(short, long)] 106 site: String, 107 108 /// Output directory for the downloaded site 109 #[arg(short, long, default_value = ".")] 110 output: PathBuf, 111 }, 112 /// Serve a site locally with real-time firehose updates 113 Serve { 114 /// Handle (e.g., alice.bsky.social) or DID 115 input: CowStr<'static>, 116 117 /// Site name (record key) 118 #[arg(short, long)] 119 site: String, 120 121 /// Output directory for the site files 122 #[arg(short, long, default_value = ".")] 123 output: PathBuf, 124 125 /// Port to serve on 126 #[arg(short, long, default_value = "8080")] 127 port: u16, 128 }, 129} 130 131#[tokio::main] 132async fn main() -> miette::Result<()> { 133 let args = Args::parse(); 134 135 let result = match args.command { 136 Some(Commands::Deploy { input, path, site, store, password, directory, spa }) => { 137 // Dispatch to appropriate authentication method 138 if let Some(password) = password { 139 run_with_app_password(input, password, path, site, directory, spa).await 140 } else { 141 run_with_oauth(input, store, path, site, directory, spa).await 142 } 143 } 144 Some(Commands::Pull { input, site, output }) => { 145 pull::pull_site(input, CowStr::from(site), output).await 146 } 147 Some(Commands::Serve { input, site, output, port }) => { 148 serve::serve_site(input, CowStr::from(site), output, port).await 149 } 150 None => { 151 // Legacy mode: if input is provided, assume deploy command 152 if let Some(input) = args.input { 153 let path = args.path.unwrap_or_else(|| PathBuf::from(".")); 154 let store = args.store.unwrap_or_else(|| "/tmp/wisp-oauth-session.json".to_string()); 155 156 // Dispatch to appropriate authentication method 157 if let Some(password) = args.password { 158 run_with_app_password(input, password, path, args.site, args.directory, args.spa).await 159 } else { 160 run_with_oauth(input, store, path, args.site, args.directory, args.spa).await 161 } 162 } else { 163 // No command and no input, show help 164 use clap::CommandFactory; 165 Args::command().print_help().into_diagnostic()?; 166 Ok(()) 167 } 168 } 169 }; 170 171 // Force exit to avoid hanging on background tasks/connections 172 match result { 173 Ok(_) => std::process::exit(0), 174 Err(e) => { 175 eprintln!("{:?}", e); 176 std::process::exit(1) 177 } 178 } 179} 180 181/// Run deployment with app password authentication 182async fn run_with_app_password( 183 input: CowStr<'static>, 184 password: CowStr<'static>, 185 path: PathBuf, 186 site: Option<String>, 187 directory: bool, 188 spa: bool, 189) -> miette::Result<()> { 190 let (session, auth) = 191 MemoryCredentialSession::authenticated(input, password, None, None).await?; 192 println!("Signed in as {}", auth.handle); 193 194 let agent: Agent<_> = Agent::from(session); 195 deploy_site(&agent, path, site, directory, spa).await 196} 197 198/// Run deployment with OAuth authentication 199async fn run_with_oauth( 200 input: CowStr<'static>, 201 store: String, 202 path: PathBuf, 203 site: Option<String>, 204 directory: bool, 205 spa: bool, 206) -> miette::Result<()> { 207 use jacquard::oauth::scopes::Scope; 208 use jacquard::oauth::atproto::AtprotoClientMetadata; 209 use jacquard::oauth::session::ClientData; 210 use url::Url; 211 212 // Request the necessary scopes for wisp.place (including settings) 213 let scopes = Scope::parse_multiple("atproto repo:place.wisp.fs repo:place.wisp.subfs repo:place.wisp.settings blob:*/*") 214 .map_err(|e| miette::miette!("Failed to parse scopes: {:?}", e))?; 215 216 // Create redirect URIs that match the loopback server (port 4000, path /oauth/callback) 217 let redirect_uris = vec![ 218 Url::parse("http://127.0.0.1:4000/oauth/callback").into_diagnostic()?, 219 Url::parse("http://[::1]:4000/oauth/callback").into_diagnostic()?, 220 ]; 221 222 // Create client metadata with matching redirect URIs and scopes 223 let client_data = ClientData { 224 keyset: None, 225 config: AtprotoClientMetadata::new_localhost( 226 Some(redirect_uris), 227 Some(scopes), 228 ), 229 }; 230 231 let oauth = OAuthClient::new(FileAuthStore::new(&store), client_data); 232 233 let session = oauth 234 .login_with_local_server(input, Default::default(), LoopbackConfig::default()) 235 .await?; 236 237 let agent: Agent<_> = Agent::from(session); 238 deploy_site(&agent, path, site, directory, spa).await 239} 240 241/// Deploy the site using the provided agent 242async fn deploy_site( 243 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 244 path: PathBuf, 245 site: Option<String>, 246 directory_listing: bool, 247 spa_mode: bool, 248) -> miette::Result<()> { 249 // Verify the path exists 250 if !path.exists() { 251 return Err(miette::miette!("Path does not exist: {}", path.display())); 252 } 253 254 // Get site name 255 let site_name = site.unwrap_or_else(|| { 256 path 257 .file_name() 258 .and_then(|n| n.to_str()) 259 .unwrap_or("site") 260 .to_string() 261 }); 262 263 println!("Deploying site '{}'...", site_name); 264 265 // Try to fetch existing manifest for incremental updates 266 let (existing_blob_map, old_subfs_uris): (HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, Vec<(String, String)>) = { 267 use jacquard_common::types::string::AtUri; 268 269 // Get the DID for this session 270 let session_info = agent.session_info().await; 271 if let Some((did, _)) = session_info { 272 // Construct the AT URI for the record 273 let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); 274 if let Ok(uri) = AtUri::new(&uri_string) { 275 match agent.get_record::<Fs>(&uri).await { 276 Ok(response) => { 277 match response.into_output() { 278 Ok(record_output) => { 279 let existing_manifest = record_output.value; 280 let mut blob_map = blob_map::extract_blob_map(&existing_manifest.root); 281 println!("Found existing manifest with {} files in main record", blob_map.len()); 282 283 // Extract subfs URIs from main record 284 let subfs_uris = subfs_utils::extract_subfs_uris(&existing_manifest.root, String::new()); 285 286 if !subfs_uris.is_empty() { 287 println!("Found {} subfs records, fetching for blob reuse...", subfs_uris.len()); 288 289 // Merge blob maps from all subfs records 290 match subfs_utils::merge_subfs_blob_maps(agent, subfs_uris.clone(), &mut blob_map).await { 291 Ok(merged_count) => { 292 println!("Total blob map: {} files (main + {} from subfs)", blob_map.len(), merged_count); 293 } 294 Err(e) => { 295 eprintln!("⚠️ Failed to merge some subfs blob maps: {}", e); 296 } 297 } 298 299 (blob_map, subfs_uris) 300 } else { 301 (blob_map, Vec::new()) 302 } 303 } 304 Err(_) => { 305 println!("No existing manifest found, uploading all files..."); 306 (HashMap::new(), Vec::new()) 307 } 308 } 309 } 310 Err(_) => { 311 // Record doesn't exist yet - this is a new site 312 println!("No existing manifest found, uploading all files..."); 313 (HashMap::new(), Vec::new()) 314 } 315 } 316 } else { 317 println!("No existing manifest found (invalid URI), uploading all files..."); 318 (HashMap::new(), Vec::new()) 319 } 320 } else { 321 println!("No existing manifest found (could not get DID), uploading all files..."); 322 (HashMap::new(), Vec::new()) 323 } 324 }; 325 326 // Build directory tree 327 let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new()).await?; 328 let uploaded_count = total_files - reused_count; 329 330 // Check if we need to split into subfs records 331 const MAX_MANIFEST_SIZE: usize = 140 * 1024; // 140KB (PDS limit is 150KB) 332 const FILE_COUNT_THRESHOLD: usize = 250; // Start splitting at this many files 333 const TARGET_FILE_COUNT: usize = 200; // Keep main manifest under this 334 335 let mut working_directory = root_dir; 336 let mut current_file_count = total_files; 337 let mut new_subfs_uris: Vec<(String, String)> = Vec::new(); 338 339 // Estimate initial manifest size 340 let mut manifest_size = subfs_utils::estimate_directory_size(&working_directory); 341 342 if total_files >= FILE_COUNT_THRESHOLD || manifest_size > MAX_MANIFEST_SIZE { 343 println!("\n⚠️ Large site detected ({} files, {:.1}KB manifest), splitting into subfs records...", 344 total_files, manifest_size as f64 / 1024.0); 345 346 let mut attempts = 0; 347 const MAX_SPLIT_ATTEMPTS: usize = 50; 348 349 while (manifest_size > MAX_MANIFEST_SIZE || current_file_count > TARGET_FILE_COUNT) && attempts < MAX_SPLIT_ATTEMPTS { 350 attempts += 1; 351 352 // Find large directories to split 353 let directories = subfs_utils::find_large_directories(&working_directory, String::new()); 354 355 if let Some(largest_dir) = directories.first() { 356 println!(" Split #{}: {} ({} files, {:.1}KB)", 357 attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0); 358 359 // Check if this directory is itself too large for a single subfs record 360 const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety 361 let mut subfs_uri = String::new(); 362 363 if largest_dir.size > MAX_SUBFS_SIZE { 364 // Need to split this directory into multiple chunks 365 println!(" → Directory too large, splitting into chunks..."); 366 let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE); 367 println!(" → Created {} chunks", chunks.len()); 368 369 // Upload each chunk as a subfs record 370 let mut chunk_uris = Vec::new(); 371 for (i, chunk) in chunks.iter().enumerate() { 372 use jacquard_common::types::string::Tid; 373 let chunk_tid = Tid::now_0(); 374 let chunk_rkey = chunk_tid.to_string(); 375 376 let chunk_file_count = subfs_utils::count_files_in_directory(chunk); 377 let chunk_size = subfs_utils::estimate_directory_size(chunk); 378 379 let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new() 380 .root(convert_fs_dir_to_subfs_dir(chunk.clone())) 381 .file_count(Some(chunk_file_count as i64)) 382 .created_at(Datetime::now()) 383 .build(); 384 385 println!(" → Uploading chunk {}/{} ({} files, {:.1}KB)...", 386 i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0); 387 388 let chunk_output = agent.put_record( 389 RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?), 390 chunk_manifest 391 ).await.into_diagnostic()?; 392 393 let chunk_uri = chunk_output.uri.to_string(); 394 chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 395 new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 396 } 397 398 // Create a parent subfs record that references all chunks 399 // Each chunk reference MUST have flat: true to merge chunk contents 400 println!(" → Creating parent subfs with {} chunk references...", chunk_uris.len()); 401 use jacquard_common::CowStr; 402 use crate::place_wisp::fs::{Subfs}; 403 404 // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs 405 let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| { 406 let uri_string = uri.clone(); 407 let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI"); 408 Entry::new() 409 .name(CowStr::from(format!("chunk{}", i))) 410 .node(EntryNode::Subfs(Box::new( 411 Subfs::new() 412 .r#type(CowStr::from("subfs")) 413 .subject(at_uri) 414 .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents 415 .build() 416 ))) 417 .build() 418 }).collect(); 419 420 let parent_root_fs = Directory::new() 421 .r#type(CowStr::from("directory")) 422 .entries(parent_entries_fs) 423 .build(); 424 425 // Convert to subfs::Directory for the parent subfs record 426 let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs); 427 428 use jacquard_common::types::string::Tid; 429 let parent_tid = Tid::now_0(); 430 let parent_rkey = parent_tid.to_string(); 431 432 let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new() 433 .root(parent_root_subfs) 434 .file_count(Some(largest_dir.file_count as i64)) 435 .created_at(Datetime::now()) 436 .build(); 437 438 let parent_output = agent.put_record( 439 RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?), 440 parent_manifest 441 ).await.into_diagnostic()?; 442 443 subfs_uri = parent_output.uri.to_string(); 444 println!(" ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri); 445 } else { 446 // Directory fits in a single subfs record 447 use jacquard_common::types::string::Tid; 448 let subfs_tid = Tid::now_0(); 449 let subfs_rkey = subfs_tid.to_string(); 450 451 let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new() 452 .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone())) 453 .file_count(Some(largest_dir.file_count as i64)) 454 .created_at(Datetime::now()) 455 .build(); 456 457 // Upload subfs record 458 let subfs_output = agent.put_record( 459 RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?), 460 subfs_manifest 461 ).await.into_diagnostic()?; 462 463 subfs_uri = subfs_output.uri.to_string(); 464 println!(" ✅ Created subfs: {}", subfs_uri); 465 } 466 467 // Replace directory with subfs node (flat: false to preserve directory structure) 468 working_directory = subfs_utils::replace_directory_with_subfs( 469 working_directory, 470 &largest_dir.path, 471 &subfs_uri, 472 false // Preserve directory - the chunks inside have flat=true 473 )?; 474 475 new_subfs_uris.push((subfs_uri, largest_dir.path.clone())); 476 current_file_count -= largest_dir.file_count; 477 478 // Recalculate manifest size 479 manifest_size = subfs_utils::estimate_directory_size(&working_directory); 480 println!(" → Manifest now {:.1}KB with {} files ({} subfs total)", 481 manifest_size as f64 / 1024.0, current_file_count, new_subfs_uris.len()); 482 483 if manifest_size <= MAX_MANIFEST_SIZE && current_file_count <= TARGET_FILE_COUNT { 484 println!("✅ Manifest now fits within limits"); 485 break; 486 } 487 } else { 488 println!(" No more subdirectories to split - stopping"); 489 break; 490 } 491 } 492 493 if attempts >= MAX_SPLIT_ATTEMPTS { 494 return Err(miette::miette!( 495 "Exceeded maximum split attempts ({}). Manifest still too large: {:.1}KB with {} files", 496 MAX_SPLIT_ATTEMPTS, 497 manifest_size as f64 / 1024.0, 498 current_file_count 499 )); 500 } 501 502 println!("✅ Split complete: {} subfs records, {} files in main manifest, {:.1}KB", 503 new_subfs_uris.len(), current_file_count, manifest_size as f64 / 1024.0); 504 } else { 505 println!("Manifest created ({} files, {:.1}KB) - no splitting needed", 506 total_files, manifest_size as f64 / 1024.0); 507 } 508 509 // Create the final Fs record 510 let fs_record = Fs::new() 511 .site(CowStr::from(site_name.clone())) 512 .root(working_directory) 513 .file_count(current_file_count as i64) 514 .created_at(Datetime::now()) 515 .build(); 516 517 // Use site name as the record key 518 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?; 519 let output = agent.put_record(RecordKey::from(rkey), fs_record).await?; 520 521 // Extract DID from the AT URI (format: at://did:plc:xxx/collection/rkey) 522 let uri_str = output.uri.to_string(); 523 let did = uri_str 524 .strip_prefix("at://") 525 .and_then(|s| s.split('/').next()) 526 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; 527 528 println!("\n✓ Deployed site '{}': {}", site_name, output.uri); 529 println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); 530 println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); 531 532 // Clean up old subfs records 533 if !old_subfs_uris.is_empty() { 534 println!("\nCleaning up {} old subfs records...", old_subfs_uris.len()); 535 536 let mut deleted_count = 0; 537 let mut failed_count = 0; 538 539 for (uri, _path) in old_subfs_uris { 540 match subfs_utils::delete_subfs_record(agent, &uri).await { 541 Ok(_) => { 542 deleted_count += 1; 543 println!(" 🗑️ Deleted old subfs: {}", uri); 544 } 545 Err(e) => { 546 failed_count += 1; 547 eprintln!(" ⚠️ Failed to delete {}: {}", uri, e); 548 } 549 } 550 } 551 552 if failed_count > 0 { 553 eprintln!("⚠️ Cleanup completed with {} deleted, {} failed", deleted_count, failed_count); 554 } else { 555 println!("✅ Cleanup complete: {} old subfs records deleted", deleted_count); 556 } 557 } 558 559 // Upload settings if either flag is set 560 if directory_listing || spa_mode { 561 // Validate mutual exclusivity 562 if directory_listing && spa_mode { 563 return Err(miette::miette!("Cannot enable both --directory and --SPA modes")); 564 } 565 566 println!("\n⚙️ Uploading site settings..."); 567 568 // Build settings record 569 let mut settings_builder = Settings::new(); 570 571 if directory_listing { 572 settings_builder = settings_builder.directory_listing(Some(true)); 573 println!(" • Directory listing: enabled"); 574 } 575 576 if spa_mode { 577 settings_builder = settings_builder.spa_mode(Some(CowStr::from("index.html"))); 578 println!(" • SPA mode: enabled (serving index.html for all routes)"); 579 } 580 581 let settings_record = settings_builder.build(); 582 583 // Upload settings record with same rkey as site 584 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?; 585 match agent.put_record(RecordKey::from(rkey), settings_record).await { 586 Ok(settings_output) => { 587 println!("✅ Settings uploaded: {}", settings_output.uri); 588 } 589 Err(e) => { 590 eprintln!("⚠️ Failed to upload settings: {}", e); 591 eprintln!(" Site was deployed successfully, but settings may need to be configured manually."); 592 } 593 } 594 } 595 596 Ok(()) 597} 598 599/// Recursively build a Directory from a filesystem path 600/// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir) 601fn build_directory<'a>( 602 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>, 603 dir_path: &'a Path, 604 existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 605 current_path: String, 606) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>> 607{ 608 Box::pin(async move { 609 // Collect all directory entries first 610 let dir_entries: Vec<_> = std::fs::read_dir(dir_path) 611 .into_diagnostic()? 612 .collect::<Result<Vec<_>, _>>() 613 .into_diagnostic()?; 614 615 // Separate files and directories 616 let mut file_tasks = Vec::new(); 617 let mut dir_tasks = Vec::new(); 618 619 for entry in dir_entries { 620 let path = entry.path(); 621 let name = entry.file_name(); 622 let name_str = name.to_str() 623 .ok_or_else(|| miette::miette!("Invalid filename: {:?}", name))? 624 .to_string(); 625 626 // Skip unwanted files and directories 627 628 // .git directory (version control - thousands of files) 629 if name_str == ".git" { 630 continue; 631 } 632 633 // .DS_Store (macOS metadata - can leak info) 634 if name_str == ".DS_Store" { 635 continue; 636 } 637 638 // .wisp.metadata.json (wisp internal metadata - should not be uploaded) 639 if name_str == ".wisp.metadata.json" { 640 continue; 641 } 642 643 // .env files (environment variables with secrets) 644 if name_str.starts_with(".env") { 645 continue; 646 } 647 648 // node_modules (dependency folder - can be 100,000+ files) 649 if name_str == "node_modules" { 650 continue; 651 } 652 653 // OS metadata files 654 if name_str == "Thumbs.db" || name_str == "desktop.ini" || name_str.starts_with("._") { 655 continue; 656 } 657 658 // macOS system directories 659 if name_str == ".Spotlight-V100" || name_str == ".Trashes" || name_str == ".fseventsd" { 660 continue; 661 } 662 663 // Cache and temp directories 664 if name_str == ".cache" || name_str == ".temp" || name_str == ".tmp" { 665 continue; 666 } 667 668 // Python cache 669 if name_str == "__pycache__" || name_str.ends_with(".pyc") { 670 continue; 671 } 672 673 // Python virtual environments 674 if name_str == ".venv" || name_str == "venv" || name_str == "env" { 675 continue; 676 } 677 678 // Editor swap files 679 if name_str.ends_with(".swp") || name_str.ends_with(".swo") || name_str.ends_with("~") { 680 continue; 681 } 682 683 let metadata = entry.metadata().into_diagnostic()?; 684 685 if metadata.is_file() { 686 // Construct full path for this file (for blob map lookup) 687 let full_path = if current_path.is_empty() { 688 name_str.clone() 689 } else { 690 format!("{}/{}", current_path, name_str) 691 }; 692 file_tasks.push((name_str, path, full_path)); 693 } else if metadata.is_dir() { 694 dir_tasks.push((name_str, path)); 695 } 696 } 697 698 // Process files concurrently with a limit of 5 699 let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) 700 .map(|(name, path, full_path)| async move { 701 let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs).await?; 702 let entry = Entry::new() 703 .name(CowStr::from(name)) 704 .node(EntryNode::File(Box::new(file_node))) 705 .build(); 706 Ok::<_, miette::Report>((entry, reused)) 707 }) 708 .buffer_unordered(5) 709 .collect::<Vec<_>>() 710 .await 711 .into_iter() 712 .collect::<miette::Result<Vec<_>>>()?; 713 714 let mut file_entries = Vec::new(); 715 let mut reused_count = 0; 716 let mut total_files = 0; 717 718 for (entry, reused) in file_results { 719 file_entries.push(entry); 720 total_files += 1; 721 if reused { 722 reused_count += 1; 723 } 724 } 725 726 // Process directories recursively (sequentially to avoid too much nesting) 727 let mut dir_entries = Vec::new(); 728 for (name, path) in dir_tasks { 729 // Construct full path for subdirectory 730 let subdir_path = if current_path.is_empty() { 731 name.clone() 732 } else { 733 format!("{}/{}", current_path, name) 734 }; 735 let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path).await?; 736 dir_entries.push(Entry::new() 737 .name(CowStr::from(name)) 738 .node(EntryNode::Directory(Box::new(subdir))) 739 .build()); 740 total_files += sub_total; 741 reused_count += sub_reused; 742 } 743 744 // Combine file and directory entries 745 let mut entries = file_entries; 746 entries.extend(dir_entries); 747 748 let directory = Directory::new() 749 .r#type(CowStr::from("directory")) 750 .entries(entries) 751 .build(); 752 753 Ok((directory, total_files, reused_count)) 754 }) 755} 756 757/// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 758/// Returns (File, reused: bool) 759/// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 760/// 761/// Special handling: _redirects files are NOT compressed (uploaded as-is) 762async fn process_file( 763 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 764 file_path: &Path, 765 file_path_key: &str, 766 existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 767) -> miette::Result<(File<'static>, bool)> 768{ 769 // Read file 770 let file_data = std::fs::read(file_path).into_diagnostic()?; 771 772 // Detect original MIME type 773 let original_mime = mime_guess::from_path(file_path) 774 .first_or_octet_stream() 775 .to_string(); 776 777 // Check if this is a _redirects file (don't compress it) 778 let is_redirects_file = file_path.file_name() 779 .and_then(|n| n.to_str()) 780 .map(|n| n == "_redirects") 781 .unwrap_or(false); 782 783 let (upload_bytes, encoding, is_base64) = if is_redirects_file { 784 // Don't compress _redirects - upload as-is 785 (file_data.clone(), None, false) 786 } else { 787 // Gzip compress 788 let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 789 encoder.write_all(&file_data).into_diagnostic()?; 790 let gzipped = encoder.finish().into_diagnostic()?; 791 792 // Base64 encode the gzipped data 793 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 794 (base64_bytes, Some("gzip"), true) 795 }; 796 797 // Compute CID for this file 798 let file_cid = cid::compute_cid(&upload_bytes); 799 800 // Check if we have an existing blob with the same CID 801 let existing_blob = existing_blobs.get(file_path_key); 802 803 if let Some((existing_blob_ref, existing_cid)) = existing_blob { 804 if existing_cid == &file_cid { 805 // CIDs match - reuse existing blob 806 println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid); 807 let mut file_builder = File::new() 808 .r#type(CowStr::from("file")) 809 .blob(existing_blob_ref.clone()) 810 .mime_type(CowStr::from(original_mime)); 811 812 if let Some(enc) = encoding { 813 file_builder = file_builder.encoding(CowStr::from(enc)); 814 } 815 if is_base64 { 816 file_builder = file_builder.base64(true); 817 } 818 819 return Ok((file_builder.build(), true)); 820 } else { 821 // CID mismatch - file changed 822 println!(" → File changed: {} (old CID: {}, new CID: {})", file_path_key, existing_cid, file_cid); 823 } 824 } else { 825 // File not in existing blob map 826 if file_path_key.starts_with("imgs/") { 827 println!(" → New file (not in blob map): {}", file_path_key); 828 } 829 } 830 831 // File is new or changed - upload it 832 let mime_type = if is_redirects_file { 833 MimeType::new_static("text/plain") 834 } else { 835 MimeType::new_static("application/octet-stream") 836 }; 837 838 println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, upload_bytes.len(), file_cid); 839 let blob = agent.upload_blob(upload_bytes, mime_type).await?; 840 841 let mut file_builder = File::new() 842 .r#type(CowStr::from("file")) 843 .blob(blob) 844 .mime_type(CowStr::from(original_mime)); 845 846 if let Some(enc) = encoding { 847 file_builder = file_builder.encoding(CowStr::from(enc)); 848 } 849 if is_base64 { 850 file_builder = file_builder.base64(true); 851 } 852 853 Ok((file_builder.build(), false)) 854} 855 856/// Convert fs::Directory to subfs::Directory 857/// They have the same structure, but different types 858fn convert_fs_dir_to_subfs_dir(fs_dir: place_wisp::fs::Directory<'static>) -> place_wisp::subfs::Directory<'static> { 859 use place_wisp::subfs::{Directory as SubfsDirectory, Entry as SubfsEntry, EntryNode as SubfsEntryNode, File as SubfsFile}; 860 861 let subfs_entries: Vec<SubfsEntry> = fs_dir.entries.into_iter().map(|entry| { 862 let node = match entry.node { 863 place_wisp::fs::EntryNode::File(file) => { 864 SubfsEntryNode::File(Box::new(SubfsFile::new() 865 .r#type(file.r#type) 866 .blob(file.blob) 867 .encoding(file.encoding) 868 .mime_type(file.mime_type) 869 .base64(file.base64) 870 .build())) 871 } 872 place_wisp::fs::EntryNode::Directory(dir) => { 873 SubfsEntryNode::Directory(Box::new(convert_fs_dir_to_subfs_dir(*dir))) 874 } 875 place_wisp::fs::EntryNode::Subfs(subfs) => { 876 // Nested subfs in the directory we're converting 877 // Note: subfs::Subfs doesn't have the 'flat' field - that's only in fs::Subfs 878 SubfsEntryNode::Subfs(Box::new(place_wisp::subfs::Subfs::new() 879 .r#type(subfs.r#type) 880 .subject(subfs.subject) 881 .build())) 882 } 883 place_wisp::fs::EntryNode::Unknown(unknown) => { 884 SubfsEntryNode::Unknown(unknown) 885 } 886 }; 887 888 SubfsEntry::new() 889 .name(entry.name) 890 .node(node) 891 .build() 892 }).collect(); 893 894 SubfsDirectory::new() 895 .r#type(fs_dir.r#type) 896 .entries(subfs_entries) 897 .build() 898} 899