Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
1mod builder_types; 2mod place_wisp; 3mod cid; 4mod blob_map; 5mod metadata; 6mod download; 7mod pull; 8mod serve; 9mod subfs_utils; 10mod redirects; 11mod ignore_patterns; 12 13use clap::{Parser, Subcommand}; 14use jacquard::CowStr; 15use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; 16use jacquard::oauth::client::OAuthClient; 17use jacquard::oauth::loopback::LoopbackConfig; 18use jacquard::prelude::IdentityResolver; 19use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri}; 20use jacquard_common::types::blob::MimeType; 21use miette::IntoDiagnostic; 22use std::path::{Path, PathBuf}; 23use std::collections::HashMap; 24use flate2::Compression; 25use flate2::write::GzEncoder; 26use std::io::Write; 27use base64::Engine; 28use futures::stream::{self, StreamExt}; 29use indicatif::{ProgressBar, ProgressStyle, MultiProgress}; 30 31use place_wisp::fs::*; 32use place_wisp::settings::*; 33 34/// Maximum number of concurrent file uploads to the PDS 35const MAX_CONCURRENT_UPLOADS: usize = 2; 36 37#[derive(Parser, Debug)] 38#[command(author, version, about = "wisp.place CLI tool")] 39struct Args { 40 #[command(subcommand)] 41 command: Option<Commands>, 42 43 // Deploy arguments (when no subcommand is specified) 44 /// Handle (e.g., alice.bsky.social), DID, or PDS URL 45 #[arg(global = true, conflicts_with = "command")] 46 input: Option<CowStr<'static>>, 47 48 /// Path to the directory containing your static site 49 #[arg(short, long, global = true, conflicts_with = "command")] 50 path: Option<PathBuf>, 51 52 /// Site name (defaults to directory name) 53 #[arg(short, long, global = true, conflicts_with = "command")] 54 site: Option<String>, 55 56 /// Path to auth store file 57 #[arg(long, global = true, conflicts_with = "command")] 58 store: Option<String>, 59 60 /// App Password for authentication 61 #[arg(long, global = true, conflicts_with = "command")] 62 password: Option<CowStr<'static>>, 63 64 /// Enable directory listing mode for paths without index files 65 #[arg(long, global = true, conflicts_with = "command")] 66 directory: bool, 67 68 /// Enable SPA mode (serve index.html for all routes) 69 #[arg(long, global = true, conflicts_with = "command")] 70 spa: bool, 71} 72 73#[derive(Subcommand, Debug)] 74enum Commands { 75 /// Deploy a static site to wisp.place (default command) 76 Deploy { 77 /// Handle (e.g., alice.bsky.social), DID, or PDS URL 78 input: CowStr<'static>, 79 80 /// Path to the directory containing your static site 81 #[arg(short, long, default_value = ".")] 82 path: PathBuf, 83 84 /// Site name (defaults to directory name) 85 #[arg(short, long)] 86 site: Option<String>, 87 88 /// Path to auth store file (will be created if missing, only used with OAuth) 89 #[arg(long, default_value = "/tmp/wisp-oauth-session.json")] 90 store: String, 91 92 /// App Password for authentication (alternative to OAuth) 93 #[arg(long)] 94 password: Option<CowStr<'static>>, 95 96 /// Enable directory listing mode for paths without index files 97 #[arg(long)] 98 directory: bool, 99 100 /// Enable SPA mode (serve index.html for all routes) 101 #[arg(long)] 102 spa: bool, 103 }, 104 /// Pull a site from the PDS to a local directory 105 Pull { 106 /// Handle (e.g., alice.bsky.social) or DID 107 input: CowStr<'static>, 108 109 /// Site name (record key) 110 #[arg(short, long)] 111 site: String, 112 113 /// Output directory for the downloaded site 114 #[arg(short, long, default_value = ".")] 115 output: PathBuf, 116 }, 117 /// Serve a site locally with real-time firehose updates 118 Serve { 119 /// Handle (e.g., alice.bsky.social) or DID 120 input: CowStr<'static>, 121 122 /// Site name (record key) 123 #[arg(short, long)] 124 site: String, 125 126 /// Output directory for the site files 127 #[arg(short, long, default_value = ".")] 128 output: PathBuf, 129 130 /// Port to serve on 131 #[arg(short, long, default_value = "8080")] 132 port: u16, 133 }, 134} 135 136#[tokio::main] 137async fn main() -> miette::Result<()> { 138 let args = Args::parse(); 139 140 let result = match args.command { 141 Some(Commands::Deploy { input, path, site, store, password, directory, spa }) => { 142 // Dispatch to appropriate authentication method 143 if let Some(password) = password { 144 run_with_app_password(input, password, path, site, directory, spa).await 145 } else { 146 run_with_oauth(input, store, path, site, directory, spa).await 147 } 148 } 149 Some(Commands::Pull { input, site, output }) => { 150 pull::pull_site(input, CowStr::from(site), output).await 151 } 152 Some(Commands::Serve { input, site, output, port }) => { 153 serve::serve_site(input, CowStr::from(site), output, port).await 154 } 155 None => { 156 // Legacy mode: if input is provided, assume deploy command 157 if let Some(input) = args.input { 158 let path = args.path.unwrap_or_else(|| PathBuf::from(".")); 159 let store = args.store.unwrap_or_else(|| "/tmp/wisp-oauth-session.json".to_string()); 160 161 // Dispatch to appropriate authentication method 162 if let Some(password) = args.password { 163 run_with_app_password(input, password, path, args.site, args.directory, args.spa).await 164 } else { 165 run_with_oauth(input, store, path, args.site, args.directory, args.spa).await 166 } 167 } else { 168 // No command and no input, show help 169 use clap::CommandFactory; 170 Args::command().print_help().into_diagnostic()?; 171 Ok(()) 172 } 173 } 174 }; 175 176 // Force exit to avoid hanging on background tasks/connections 177 match result { 178 Ok(_) => std::process::exit(0), 179 Err(e) => { 180 eprintln!("{:?}", e); 181 std::process::exit(1) 182 } 183 } 184} 185 186/// Run deployment with app password authentication 187async fn run_with_app_password( 188 input: CowStr<'static>, 189 password: CowStr<'static>, 190 path: PathBuf, 191 site: Option<String>, 192 directory: bool, 193 spa: bool, 194) -> miette::Result<()> { 195 let (session, auth) = 196 MemoryCredentialSession::authenticated(input, password, None, None).await?; 197 println!("Signed in as {}", auth.handle); 198 199 let agent: Agent<_> = Agent::from(session); 200 deploy_site(&agent, path, site, directory, spa).await 201} 202 203/// Run deployment with OAuth authentication 204async fn run_with_oauth( 205 input: CowStr<'static>, 206 store: String, 207 path: PathBuf, 208 site: Option<String>, 209 directory: bool, 210 spa: bool, 211) -> miette::Result<()> { 212 use jacquard::oauth::scopes::Scope; 213 use jacquard::oauth::atproto::AtprotoClientMetadata; 214 use jacquard::oauth::session::ClientData; 215 use url::Url; 216 217 // Request the necessary scopes for wisp.place (including settings) 218 let scopes = Scope::parse_multiple("atproto repo:place.wisp.fs repo:place.wisp.subfs repo:place.wisp.settings blob:*/*") 219 .map_err(|e| miette::miette!("Failed to parse scopes: {:?}", e))?; 220 221 // Create redirect URIs that match the loopback server (port 4000, path /oauth/callback) 222 let redirect_uris = vec![ 223 Url::parse("http://127.0.0.1:4000/oauth/callback").into_diagnostic()?, 224 Url::parse("http://[::1]:4000/oauth/callback").into_diagnostic()?, 225 ]; 226 227 // Create client metadata with matching redirect URIs and scopes 228 let client_data = ClientData { 229 keyset: None, 230 config: AtprotoClientMetadata::new_localhost( 231 Some(redirect_uris), 232 Some(scopes), 233 ), 234 }; 235 236 let oauth = OAuthClient::new(FileAuthStore::new(&store), client_data); 237 238 let session = oauth 239 .login_with_local_server(input, Default::default(), LoopbackConfig::default()) 240 .await?; 241 242 let agent: Agent<_> = Agent::from(session); 243 deploy_site(&agent, path, site, directory, spa).await 244} 245 246/// Deploy the site using the provided agent 247async fn deploy_site( 248 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 249 path: PathBuf, 250 site: Option<String>, 251 directory_listing: bool, 252 spa_mode: bool, 253) -> miette::Result<()> { 254 // Verify the path exists 255 if !path.exists() { 256 return Err(miette::miette!("Path does not exist: {}", path.display())); 257 } 258 259 // Get site name 260 let site_name = site.unwrap_or_else(|| { 261 path 262 .file_name() 263 .and_then(|n| n.to_str()) 264 .unwrap_or("site") 265 .to_string() 266 }); 267 268 println!("Deploying site '{}'...", site_name); 269 270 // Try to fetch existing manifest for incremental updates 271 let (existing_blob_map, old_subfs_uris): (HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, Vec<(String, String)>) = { 272 use jacquard_common::types::string::AtUri; 273 274 // Get the DID for this session 275 let session_info = agent.session_info().await; 276 if let Some((did, _)) = session_info { 277 // Construct the AT URI for the record 278 let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); 279 if let Ok(uri) = AtUri::new(&uri_string) { 280 match agent.get_record::<Fs>(&uri).await { 281 Ok(response) => { 282 match response.into_output() { 283 Ok(record_output) => { 284 let existing_manifest = record_output.value; 285 let mut blob_map = blob_map::extract_blob_map(&existing_manifest.root); 286 println!("Found existing manifest with {} files in main record", blob_map.len()); 287 288 // Extract subfs URIs from main record 289 let subfs_uris = subfs_utils::extract_subfs_uris(&existing_manifest.root, String::new()); 290 291 if !subfs_uris.is_empty() { 292 println!("Found {} subfs records, fetching for blob reuse...", subfs_uris.len()); 293 294 // Merge blob maps from all subfs records 295 match subfs_utils::merge_subfs_blob_maps(agent, subfs_uris.clone(), &mut blob_map).await { 296 Ok(merged_count) => { 297 println!("Total blob map: {} files (main + {} from subfs)", blob_map.len(), merged_count); 298 } 299 Err(e) => { 300 eprintln!("⚠️ Failed to merge some subfs blob maps: {}", e); 301 } 302 } 303 304 (blob_map, subfs_uris) 305 } else { 306 (blob_map, Vec::new()) 307 } 308 } 309 Err(_) => { 310 println!("No existing manifest found, uploading all files..."); 311 (HashMap::new(), Vec::new()) 312 } 313 } 314 } 315 Err(_) => { 316 // Record doesn't exist yet - this is a new site 317 println!("No existing manifest found, uploading all files..."); 318 (HashMap::new(), Vec::new()) 319 } 320 } 321 } else { 322 println!("No existing manifest found (invalid URI), uploading all files..."); 323 (HashMap::new(), Vec::new()) 324 } 325 } else { 326 println!("No existing manifest found (could not get DID), uploading all files..."); 327 (HashMap::new(), Vec::new()) 328 } 329 }; 330 331 // Build directory tree with ignore patterns 332 let ignore_matcher = ignore_patterns::IgnoreMatcher::new(&path)?; 333 334 // Create progress tracking (spinner style since we don't know total count upfront) 335 let multi_progress = MultiProgress::new(); 336 let progress = multi_progress.add(ProgressBar::new_spinner()); 337 progress.set_style( 338 ProgressStyle::default_spinner() 339 .template("[{elapsed_precise}] {spinner:.cyan} {pos} files {msg}") 340 .into_diagnostic()? 341 .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ") 342 ); 343 progress.set_message("Scanning files..."); 344 progress.enable_steady_tick(std::time::Duration::from_millis(100)); 345 346 let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new(), &ignore_matcher, &progress).await?; 347 let uploaded_count = total_files - reused_count; 348 349 progress.finish_with_message(format!("{} files ({} uploaded, {} reused)", total_files, uploaded_count, reused_count)); 350 351 // Check if we need to split into subfs records 352 const MAX_MANIFEST_SIZE: usize = 140 * 1024; // 140KB (PDS limit is 150KB) 353 const FILE_COUNT_THRESHOLD: usize = 250; // Start splitting at this many files 354 const TARGET_FILE_COUNT: usize = 200; // Keep main manifest under this 355 356 let mut working_directory = root_dir; 357 let mut current_file_count = total_files; 358 let mut new_subfs_uris: Vec<(String, String)> = Vec::new(); 359 360 // Estimate initial manifest size 361 let mut manifest_size = subfs_utils::estimate_directory_size(&working_directory); 362 363 if total_files >= FILE_COUNT_THRESHOLD || manifest_size > MAX_MANIFEST_SIZE { 364 println!("\n⚠️ Large site detected ({} files, {:.1}KB manifest), splitting into subfs records...", 365 total_files, manifest_size as f64 / 1024.0); 366 367 let mut attempts = 0; 368 const MAX_SPLIT_ATTEMPTS: usize = 50; 369 370 while (manifest_size > MAX_MANIFEST_SIZE || current_file_count > TARGET_FILE_COUNT) && attempts < MAX_SPLIT_ATTEMPTS { 371 attempts += 1; 372 373 // Find large directories to split 374 let directories = subfs_utils::find_large_directories(&working_directory, String::new()); 375 376 if let Some(largest_dir) = directories.first() { 377 println!(" Split #{}: {} ({} files, {:.1}KB)", 378 attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0); 379 380 // Check if this directory is itself too large for a single subfs record 381 const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety 382 let mut subfs_uri = String::new(); 383 384 if largest_dir.size > MAX_SUBFS_SIZE { 385 // Need to split this directory into multiple chunks 386 println!(" → Directory too large, splitting into chunks..."); 387 let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE); 388 println!(" → Created {} chunks", chunks.len()); 389 390 // Upload each chunk as a subfs record 391 let mut chunk_uris = Vec::new(); 392 for (i, chunk) in chunks.iter().enumerate() { 393 use jacquard_common::types::string::Tid; 394 let chunk_tid = Tid::now_0(); 395 let chunk_rkey = chunk_tid.to_string(); 396 397 let chunk_file_count = subfs_utils::count_files_in_directory(chunk); 398 let chunk_size = subfs_utils::estimate_directory_size(chunk); 399 400 let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new() 401 .root(convert_fs_dir_to_subfs_dir(chunk.clone())) 402 .file_count(Some(chunk_file_count as i64)) 403 .created_at(Datetime::now()) 404 .build(); 405 406 println!(" → Uploading chunk {}/{} ({} files, {:.1}KB)...", 407 i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0); 408 409 let chunk_output = agent.put_record( 410 RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?), 411 chunk_manifest 412 ).await.into_diagnostic()?; 413 414 let chunk_uri = chunk_output.uri.to_string(); 415 chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 416 new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 417 } 418 419 // Create a parent subfs record that references all chunks 420 // Each chunk reference MUST have flat: true to merge chunk contents 421 println!(" → Creating parent subfs with {} chunk references...", chunk_uris.len()); 422 use jacquard_common::CowStr; 423 use crate::place_wisp::fs::{Subfs}; 424 425 // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs 426 let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| { 427 let uri_string = uri.clone(); 428 let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI"); 429 Entry::new() 430 .name(CowStr::from(format!("chunk{}", i))) 431 .node(EntryNode::Subfs(Box::new( 432 Subfs::new() 433 .r#type(CowStr::from("subfs")) 434 .subject(at_uri) 435 .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents 436 .build() 437 ))) 438 .build() 439 }).collect(); 440 441 let parent_root_fs = Directory::new() 442 .r#type(CowStr::from("directory")) 443 .entries(parent_entries_fs) 444 .build(); 445 446 // Convert to subfs::Directory for the parent subfs record 447 let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs); 448 449 use jacquard_common::types::string::Tid; 450 let parent_tid = Tid::now_0(); 451 let parent_rkey = parent_tid.to_string(); 452 453 let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new() 454 .root(parent_root_subfs) 455 .file_count(Some(largest_dir.file_count as i64)) 456 .created_at(Datetime::now()) 457 .build(); 458 459 let parent_output = agent.put_record( 460 RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?), 461 parent_manifest 462 ).await.into_diagnostic()?; 463 464 subfs_uri = parent_output.uri.to_string(); 465 println!(" ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri); 466 } else { 467 // Directory fits in a single subfs record 468 use jacquard_common::types::string::Tid; 469 let subfs_tid = Tid::now_0(); 470 let subfs_rkey = subfs_tid.to_string(); 471 472 let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new() 473 .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone())) 474 .file_count(Some(largest_dir.file_count as i64)) 475 .created_at(Datetime::now()) 476 .build(); 477 478 // Upload subfs record 479 let subfs_output = agent.put_record( 480 RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?), 481 subfs_manifest 482 ).await.into_diagnostic()?; 483 484 subfs_uri = subfs_output.uri.to_string(); 485 println!(" ✅ Created subfs: {}", subfs_uri); 486 } 487 488 // Replace directory with subfs node (flat: false to preserve directory structure) 489 working_directory = subfs_utils::replace_directory_with_subfs( 490 working_directory, 491 &largest_dir.path, 492 &subfs_uri, 493 false // Preserve directory - the chunks inside have flat=true 494 )?; 495 496 new_subfs_uris.push((subfs_uri, largest_dir.path.clone())); 497 current_file_count -= largest_dir.file_count; 498 499 // Recalculate manifest size 500 manifest_size = subfs_utils::estimate_directory_size(&working_directory); 501 println!(" → Manifest now {:.1}KB with {} files ({} subfs total)", 502 manifest_size as f64 / 1024.0, current_file_count, new_subfs_uris.len()); 503 504 if manifest_size <= MAX_MANIFEST_SIZE && current_file_count <= TARGET_FILE_COUNT { 505 println!("✅ Manifest now fits within limits"); 506 break; 507 } 508 } else { 509 println!(" No more subdirectories to split - stopping"); 510 break; 511 } 512 } 513 514 if attempts >= MAX_SPLIT_ATTEMPTS { 515 return Err(miette::miette!( 516 "Exceeded maximum split attempts ({}). Manifest still too large: {:.1}KB with {} files", 517 MAX_SPLIT_ATTEMPTS, 518 manifest_size as f64 / 1024.0, 519 current_file_count 520 )); 521 } 522 523 println!("✅ Split complete: {} subfs records, {} files in main manifest, {:.1}KB", 524 new_subfs_uris.len(), current_file_count, manifest_size as f64 / 1024.0); 525 } else { 526 println!("Manifest created ({} files, {:.1}KB) - no splitting needed", 527 total_files, manifest_size as f64 / 1024.0); 528 } 529 530 // Create the final Fs record 531 let fs_record = Fs::new() 532 .site(CowStr::from(site_name.clone())) 533 .root(working_directory) 534 .file_count(current_file_count as i64) 535 .created_at(Datetime::now()) 536 .build(); 537 538 // Use site name as the record key 539 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?; 540 let output = agent.put_record(RecordKey::from(rkey), fs_record).await?; 541 542 // Extract DID from the AT URI (format: at://did:plc:xxx/collection/rkey) 543 let uri_str = output.uri.to_string(); 544 let did = uri_str 545 .strip_prefix("at://") 546 .and_then(|s| s.split('/').next()) 547 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; 548 549 println!("\n✓ Deployed site '{}': {}", site_name, output.uri); 550 println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); 551 println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); 552 553 // Clean up old subfs records 554 if !old_subfs_uris.is_empty() { 555 println!("\nCleaning up {} old subfs records...", old_subfs_uris.len()); 556 557 let mut deleted_count = 0; 558 let mut failed_count = 0; 559 560 for (uri, _path) in old_subfs_uris { 561 match subfs_utils::delete_subfs_record(agent, &uri).await { 562 Ok(_) => { 563 deleted_count += 1; 564 println!(" 🗑️ Deleted old subfs: {}", uri); 565 } 566 Err(e) => { 567 failed_count += 1; 568 eprintln!(" ⚠️ Failed to delete {}: {}", uri, e); 569 } 570 } 571 } 572 573 if failed_count > 0 { 574 eprintln!("⚠️ Cleanup completed with {} deleted, {} failed", deleted_count, failed_count); 575 } else { 576 println!("✅ Cleanup complete: {} old subfs records deleted", deleted_count); 577 } 578 } 579 580 // Upload settings if either flag is set 581 if directory_listing || spa_mode { 582 // Validate mutual exclusivity 583 if directory_listing && spa_mode { 584 return Err(miette::miette!("Cannot enable both --directory and --SPA modes")); 585 } 586 587 println!("\n⚙️ Uploading site settings..."); 588 589 // Build settings record 590 let mut settings_builder = Settings::new(); 591 592 if directory_listing { 593 settings_builder = settings_builder.directory_listing(Some(true)); 594 println!(" • Directory listing: enabled"); 595 } 596 597 if spa_mode { 598 settings_builder = settings_builder.spa_mode(Some(CowStr::from("index.html"))); 599 println!(" • SPA mode: enabled (serving index.html for all routes)"); 600 } 601 602 let settings_record = settings_builder.build(); 603 604 // Upload settings record with same rkey as site 605 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?; 606 match agent.put_record(RecordKey::from(rkey), settings_record).await { 607 Ok(settings_output) => { 608 println!("✅ Settings uploaded: {}", settings_output.uri); 609 } 610 Err(e) => { 611 eprintln!("⚠️ Failed to upload settings: {}", e); 612 eprintln!(" Site was deployed successfully, but settings may need to be configured manually."); 613 } 614 } 615 } 616 617 Ok(()) 618} 619 620/// Recursively build a Directory from a filesystem path 621/// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir) 622fn build_directory<'a>( 623 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>, 624 dir_path: &'a Path, 625 existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 626 current_path: String, 627 ignore_matcher: &'a ignore_patterns::IgnoreMatcher, 628 progress: &'a ProgressBar, 629) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>> 630{ 631 Box::pin(async move { 632 // Collect all directory entries first 633 let dir_entries: Vec<_> = std::fs::read_dir(dir_path) 634 .into_diagnostic()? 635 .collect::<Result<Vec<_>, _>>() 636 .into_diagnostic()?; 637 638 // Separate files and directories 639 let mut file_tasks = Vec::new(); 640 let mut dir_tasks = Vec::new(); 641 642 for entry in dir_entries { 643 let path = entry.path(); 644 let name = entry.file_name(); 645 let name_str = name.to_str() 646 .ok_or_else(|| miette::miette!("Invalid filename: {:?}", name))? 647 .to_string(); 648 649 // Construct full path for ignore checking 650 let full_path = if current_path.is_empty() { 651 name_str.clone() 652 } else { 653 format!("{}/{}", current_path, name_str) 654 }; 655 656 // Skip files/directories that match ignore patterns 657 if ignore_matcher.is_ignored(&full_path) || ignore_matcher.is_filename_ignored(&name_str) { 658 continue; 659 } 660 661 let metadata = entry.metadata().into_diagnostic()?; 662 663 if metadata.is_file() { 664 // Construct full path for this file (for blob map lookup) 665 let full_path = if current_path.is_empty() { 666 name_str.clone() 667 } else { 668 format!("{}/{}", current_path, name_str) 669 }; 670 file_tasks.push((name_str, path, full_path)); 671 } else if metadata.is_dir() { 672 dir_tasks.push((name_str, path)); 673 } 674 } 675 676 // Process files concurrently with a limit of 2 677 let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) 678 .map(|(name, path, full_path)| async move { 679 let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs, progress).await?; 680 progress.inc(1); 681 let entry = Entry::new() 682 .name(CowStr::from(name)) 683 .node(EntryNode::File(Box::new(file_node))) 684 .build(); 685 Ok::<_, miette::Report>((entry, reused)) 686 }) 687 .buffer_unordered(MAX_CONCURRENT_UPLOADS) 688 .collect::<Vec<_>>() 689 .await 690 .into_iter() 691 .collect::<miette::Result<Vec<_>>>()?; 692 693 let mut file_entries = Vec::new(); 694 let mut reused_count = 0; 695 let mut total_files = 0; 696 697 for (entry, reused) in file_results { 698 file_entries.push(entry); 699 total_files += 1; 700 if reused { 701 reused_count += 1; 702 } 703 } 704 705 // Process directories recursively (sequentially to avoid too much nesting) 706 let mut dir_entries = Vec::new(); 707 for (name, path) in dir_tasks { 708 // Construct full path for subdirectory 709 let subdir_path = if current_path.is_empty() { 710 name.clone() 711 } else { 712 format!("{}/{}", current_path, name) 713 }; 714 let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path, ignore_matcher, progress).await?; 715 dir_entries.push(Entry::new() 716 .name(CowStr::from(name)) 717 .node(EntryNode::Directory(Box::new(subdir))) 718 .build()); 719 total_files += sub_total; 720 reused_count += sub_reused; 721 } 722 723 // Combine file and directory entries 724 let mut entries = file_entries; 725 entries.extend(dir_entries); 726 727 let directory = Directory::new() 728 .r#type(CowStr::from("directory")) 729 .entries(entries) 730 .build(); 731 732 Ok((directory, total_files, reused_count)) 733 }) 734} 735 736/// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 737/// Returns (File, reused: bool) 738/// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 739/// 740/// Special handling: _redirects files are NOT compressed (uploaded as-is) 741async fn process_file( 742 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 743 file_path: &Path, 744 file_path_key: &str, 745 existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 746 progress: &ProgressBar, 747) -> miette::Result<(File<'static>, bool)> 748{ 749 // Read file 750 let file_data = std::fs::read(file_path).into_diagnostic()?; 751 752 // Detect original MIME type 753 let original_mime = mime_guess::from_path(file_path) 754 .first_or_octet_stream() 755 .to_string(); 756 757 // Check if this is a _redirects file (don't compress it) 758 let is_redirects_file = file_path.file_name() 759 .and_then(|n| n.to_str()) 760 .map(|n| n == "_redirects") 761 .unwrap_or(false); 762 763 let (upload_bytes, encoding, is_base64) = if is_redirects_file { 764 // Don't compress _redirects - upload as-is 765 (file_data.clone(), None, false) 766 } else { 767 // Gzip compress 768 let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 769 encoder.write_all(&file_data).into_diagnostic()?; 770 let gzipped = encoder.finish().into_diagnostic()?; 771 772 // Base64 encode the gzipped data 773 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 774 (base64_bytes, Some("gzip"), true) 775 }; 776 777 // Compute CID for this file 778 let file_cid = cid::compute_cid(&upload_bytes); 779 780 // Check if we have an existing blob with the same CID 781 let existing_blob = existing_blobs.get(file_path_key); 782 783 if let Some((existing_blob_ref, existing_cid)) = existing_blob { 784 if existing_cid == &file_cid { 785 // CIDs match - reuse existing blob 786 progress.set_message(format!("✓ Reused {}", file_path_key)); 787 let mut file_builder = File::new() 788 .r#type(CowStr::from("file")) 789 .blob(existing_blob_ref.clone()) 790 .mime_type(CowStr::from(original_mime)); 791 792 if let Some(enc) = encoding { 793 file_builder = file_builder.encoding(CowStr::from(enc)); 794 } 795 if is_base64 { 796 file_builder = file_builder.base64(true); 797 } 798 799 return Ok((file_builder.build(), true)); 800 } 801 } 802 803 // File is new or changed - upload it 804 let mime_type = if is_redirects_file { 805 MimeType::new_static("text/plain") 806 } else { 807 MimeType::new_static("application/octet-stream") 808 }; 809 810 // Format file size nicely 811 let size_str = if upload_bytes.len() < 1024 { 812 format!("{} B", upload_bytes.len()) 813 } else if upload_bytes.len() < 1024 * 1024 { 814 format!("{:.1} KB", upload_bytes.len() as f64 / 1024.0) 815 } else { 816 format!("{:.1} MB", upload_bytes.len() as f64 / (1024.0 * 1024.0)) 817 }; 818 819 progress.set_message(format!("↑ Uploading {} ({})", file_path_key, size_str)); 820 let blob = agent.upload_blob(upload_bytes, mime_type).await?; 821 progress.set_message(format!("✓ Uploaded {}", file_path_key)); 822 823 let mut file_builder = File::new() 824 .r#type(CowStr::from("file")) 825 .blob(blob) 826 .mime_type(CowStr::from(original_mime)); 827 828 if let Some(enc) = encoding { 829 file_builder = file_builder.encoding(CowStr::from(enc)); 830 } 831 if is_base64 { 832 file_builder = file_builder.base64(true); 833 } 834 835 Ok((file_builder.build(), false)) 836} 837 838/// Convert fs::Directory to subfs::Directory 839/// They have the same structure, but different types 840fn convert_fs_dir_to_subfs_dir(fs_dir: place_wisp::fs::Directory<'static>) -> place_wisp::subfs::Directory<'static> { 841 use place_wisp::subfs::{Directory as SubfsDirectory, Entry as SubfsEntry, EntryNode as SubfsEntryNode, File as SubfsFile}; 842 843 let subfs_entries: Vec<SubfsEntry> = fs_dir.entries.into_iter().map(|entry| { 844 let node = match entry.node { 845 place_wisp::fs::EntryNode::File(file) => { 846 SubfsEntryNode::File(Box::new(SubfsFile::new() 847 .r#type(file.r#type) 848 .blob(file.blob) 849 .encoding(file.encoding) 850 .mime_type(file.mime_type) 851 .base64(file.base64) 852 .build())) 853 } 854 place_wisp::fs::EntryNode::Directory(dir) => { 855 SubfsEntryNode::Directory(Box::new(convert_fs_dir_to_subfs_dir(*dir))) 856 } 857 place_wisp::fs::EntryNode::Subfs(subfs) => { 858 // Nested subfs in the directory we're converting 859 // Note: subfs::Subfs doesn't have the 'flat' field - that's only in fs::Subfs 860 SubfsEntryNode::Subfs(Box::new(place_wisp::subfs::Subfs::new() 861 .r#type(subfs.r#type) 862 .subject(subfs.subject) 863 .build())) 864 } 865 place_wisp::fs::EntryNode::Unknown(unknown) => { 866 SubfsEntryNode::Unknown(unknown) 867 } 868 }; 869 870 SubfsEntry::new() 871 .name(entry.name) 872 .node(node) 873 .build() 874 }).collect(); 875 876 SubfsDirectory::new() 877 .r#type(fs_dir.r#type) 878 .entries(subfs_entries) 879 .build() 880} 881