Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
wisp.place
1mod builder_types;
2mod place_wisp;
3mod cid;
4mod blob_map;
5mod metadata;
6mod download;
7mod pull;
8mod serve;
9mod subfs_utils;
10mod redirects;
11
12use clap::{Parser, Subcommand};
13use jacquard::CowStr;
14use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession};
15use jacquard::oauth::client::OAuthClient;
16use jacquard::oauth::loopback::LoopbackConfig;
17use jacquard::prelude::IdentityResolver;
18use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri};
19use jacquard_common::types::blob::MimeType;
20use miette::IntoDiagnostic;
21use std::path::{Path, PathBuf};
22use std::collections::HashMap;
23use flate2::Compression;
24use flate2::write::GzEncoder;
25use std::io::Write;
26use base64::Engine;
27use futures::stream::{self, StreamExt};
28
29use place_wisp::fs::*;
30use place_wisp::settings::*;
31
32#[derive(Parser, Debug)]
33#[command(author, version, about = "wisp.place CLI tool")]
34struct Args {
35 #[command(subcommand)]
36 command: Option<Commands>,
37
38 // Deploy arguments (when no subcommand is specified)
39 /// Handle (e.g., alice.bsky.social), DID, or PDS URL
40 #[arg(global = true, conflicts_with = "command")]
41 input: Option<CowStr<'static>>,
42
43 /// Path to the directory containing your static site
44 #[arg(short, long, global = true, conflicts_with = "command")]
45 path: Option<PathBuf>,
46
47 /// Site name (defaults to directory name)
48 #[arg(short, long, global = true, conflicts_with = "command")]
49 site: Option<String>,
50
51 /// Path to auth store file
52 #[arg(long, global = true, conflicts_with = "command")]
53 store: Option<String>,
54
55 /// App Password for authentication
56 #[arg(long, global = true, conflicts_with = "command")]
57 password: Option<CowStr<'static>>,
58
59 /// Enable directory listing mode for paths without index files
60 #[arg(long, global = true, conflicts_with = "command")]
61 directory: bool,
62
63 /// Enable SPA mode (serve index.html for all routes)
64 #[arg(long, global = true, conflicts_with = "command")]
65 spa: bool,
66}
67
68#[derive(Subcommand, Debug)]
69enum Commands {
70 /// Deploy a static site to wisp.place (default command)
71 Deploy {
72 /// Handle (e.g., alice.bsky.social), DID, or PDS URL
73 input: CowStr<'static>,
74
75 /// Path to the directory containing your static site
76 #[arg(short, long, default_value = ".")]
77 path: PathBuf,
78
79 /// Site name (defaults to directory name)
80 #[arg(short, long)]
81 site: Option<String>,
82
83 /// Path to auth store file (will be created if missing, only used with OAuth)
84 #[arg(long, default_value = "/tmp/wisp-oauth-session.json")]
85 store: String,
86
87 /// App Password for authentication (alternative to OAuth)
88 #[arg(long)]
89 password: Option<CowStr<'static>>,
90
91 /// Enable directory listing mode for paths without index files
92 #[arg(long)]
93 directory: bool,
94
95 /// Enable SPA mode (serve index.html for all routes)
96 #[arg(long)]
97 spa: bool,
98 },
99 /// Pull a site from the PDS to a local directory
100 Pull {
101 /// Handle (e.g., alice.bsky.social) or DID
102 input: CowStr<'static>,
103
104 /// Site name (record key)
105 #[arg(short, long)]
106 site: String,
107
108 /// Output directory for the downloaded site
109 #[arg(short, long, default_value = ".")]
110 output: PathBuf,
111 },
112 /// Serve a site locally with real-time firehose updates
113 Serve {
114 /// Handle (e.g., alice.bsky.social) or DID
115 input: CowStr<'static>,
116
117 /// Site name (record key)
118 #[arg(short, long)]
119 site: String,
120
121 /// Output directory for the site files
122 #[arg(short, long, default_value = ".")]
123 output: PathBuf,
124
125 /// Port to serve on
126 #[arg(short, long, default_value = "8080")]
127 port: u16,
128 },
129}
130
131#[tokio::main]
132async fn main() -> miette::Result<()> {
133 let args = Args::parse();
134
135 let result = match args.command {
136 Some(Commands::Deploy { input, path, site, store, password, directory, spa }) => {
137 // Dispatch to appropriate authentication method
138 if let Some(password) = password {
139 run_with_app_password(input, password, path, site, directory, spa).await
140 } else {
141 run_with_oauth(input, store, path, site, directory, spa).await
142 }
143 }
144 Some(Commands::Pull { input, site, output }) => {
145 pull::pull_site(input, CowStr::from(site), output).await
146 }
147 Some(Commands::Serve { input, site, output, port }) => {
148 serve::serve_site(input, CowStr::from(site), output, port).await
149 }
150 None => {
151 // Legacy mode: if input is provided, assume deploy command
152 if let Some(input) = args.input {
153 let path = args.path.unwrap_or_else(|| PathBuf::from("."));
154 let store = args.store.unwrap_or_else(|| "/tmp/wisp-oauth-session.json".to_string());
155
156 // Dispatch to appropriate authentication method
157 if let Some(password) = args.password {
158 run_with_app_password(input, password, path, args.site, args.directory, args.spa).await
159 } else {
160 run_with_oauth(input, store, path, args.site, args.directory, args.spa).await
161 }
162 } else {
163 // No command and no input, show help
164 use clap::CommandFactory;
165 Args::command().print_help().into_diagnostic()?;
166 Ok(())
167 }
168 }
169 };
170
171 // Force exit to avoid hanging on background tasks/connections
172 match result {
173 Ok(_) => std::process::exit(0),
174 Err(e) => {
175 eprintln!("{:?}", e);
176 std::process::exit(1)
177 }
178 }
179}
180
181/// Run deployment with app password authentication
182async fn run_with_app_password(
183 input: CowStr<'static>,
184 password: CowStr<'static>,
185 path: PathBuf,
186 site: Option<String>,
187 directory: bool,
188 spa: bool,
189) -> miette::Result<()> {
190 let (session, auth) =
191 MemoryCredentialSession::authenticated(input, password, None, None).await?;
192 println!("Signed in as {}", auth.handle);
193
194 let agent: Agent<_> = Agent::from(session);
195 deploy_site(&agent, path, site, directory, spa).await
196}
197
198/// Run deployment with OAuth authentication
199async fn run_with_oauth(
200 input: CowStr<'static>,
201 store: String,
202 path: PathBuf,
203 site: Option<String>,
204 directory: bool,
205 spa: bool,
206) -> miette::Result<()> {
207 use jacquard::oauth::scopes::Scope;
208 use jacquard::oauth::atproto::AtprotoClientMetadata;
209 use jacquard::oauth::session::ClientData;
210 use url::Url;
211
212 // Request the necessary scopes for wisp.place (including settings)
213 let scopes = Scope::parse_multiple("atproto repo:place.wisp.fs repo:place.wisp.subfs repo:place.wisp.settings blob:*/*")
214 .map_err(|e| miette::miette!("Failed to parse scopes: {:?}", e))?;
215
216 // Create redirect URIs that match the loopback server (port 4000, path /oauth/callback)
217 let redirect_uris = vec![
218 Url::parse("http://127.0.0.1:4000/oauth/callback").into_diagnostic()?,
219 Url::parse("http://[::1]:4000/oauth/callback").into_diagnostic()?,
220 ];
221
222 // Create client metadata with matching redirect URIs and scopes
223 let client_data = ClientData {
224 keyset: None,
225 config: AtprotoClientMetadata::new_localhost(
226 Some(redirect_uris),
227 Some(scopes),
228 ),
229 };
230
231 let oauth = OAuthClient::new(FileAuthStore::new(&store), client_data);
232
233 let session = oauth
234 .login_with_local_server(input, Default::default(), LoopbackConfig::default())
235 .await?;
236
237 let agent: Agent<_> = Agent::from(session);
238 deploy_site(&agent, path, site, directory, spa).await
239}
240
241/// Deploy the site using the provided agent
242async fn deploy_site(
243 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>,
244 path: PathBuf,
245 site: Option<String>,
246 directory_listing: bool,
247 spa_mode: bool,
248) -> miette::Result<()> {
249 // Verify the path exists
250 if !path.exists() {
251 return Err(miette::miette!("Path does not exist: {}", path.display()));
252 }
253
254 // Get site name
255 let site_name = site.unwrap_or_else(|| {
256 path
257 .file_name()
258 .and_then(|n| n.to_str())
259 .unwrap_or("site")
260 .to_string()
261 });
262
263 println!("Deploying site '{}'...", site_name);
264
265 // Try to fetch existing manifest for incremental updates
266 let (existing_blob_map, old_subfs_uris): (HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, Vec<(String, String)>) = {
267 use jacquard_common::types::string::AtUri;
268
269 // Get the DID for this session
270 let session_info = agent.session_info().await;
271 if let Some((did, _)) = session_info {
272 // Construct the AT URI for the record
273 let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name);
274 if let Ok(uri) = AtUri::new(&uri_string) {
275 match agent.get_record::<Fs>(&uri).await {
276 Ok(response) => {
277 match response.into_output() {
278 Ok(record_output) => {
279 let existing_manifest = record_output.value;
280 let mut blob_map = blob_map::extract_blob_map(&existing_manifest.root);
281 println!("Found existing manifest with {} files in main record", blob_map.len());
282
283 // Extract subfs URIs from main record
284 let subfs_uris = subfs_utils::extract_subfs_uris(&existing_manifest.root, String::new());
285
286 if !subfs_uris.is_empty() {
287 println!("Found {} subfs records, fetching for blob reuse...", subfs_uris.len());
288
289 // Merge blob maps from all subfs records
290 match subfs_utils::merge_subfs_blob_maps(agent, subfs_uris.clone(), &mut blob_map).await {
291 Ok(merged_count) => {
292 println!("Total blob map: {} files (main + {} from subfs)", blob_map.len(), merged_count);
293 }
294 Err(e) => {
295 eprintln!("⚠️ Failed to merge some subfs blob maps: {}", e);
296 }
297 }
298
299 (blob_map, subfs_uris)
300 } else {
301 (blob_map, Vec::new())
302 }
303 }
304 Err(_) => {
305 println!("No existing manifest found, uploading all files...");
306 (HashMap::new(), Vec::new())
307 }
308 }
309 }
310 Err(_) => {
311 // Record doesn't exist yet - this is a new site
312 println!("No existing manifest found, uploading all files...");
313 (HashMap::new(), Vec::new())
314 }
315 }
316 } else {
317 println!("No existing manifest found (invalid URI), uploading all files...");
318 (HashMap::new(), Vec::new())
319 }
320 } else {
321 println!("No existing manifest found (could not get DID), uploading all files...");
322 (HashMap::new(), Vec::new())
323 }
324 };
325
326 // Build directory tree
327 let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new()).await?;
328 let uploaded_count = total_files - reused_count;
329
330 // Check if we need to split into subfs records
331 const MAX_MANIFEST_SIZE: usize = 140 * 1024; // 140KB (PDS limit is 150KB)
332 const FILE_COUNT_THRESHOLD: usize = 250; // Start splitting at this many files
333 const TARGET_FILE_COUNT: usize = 200; // Keep main manifest under this
334
335 let mut working_directory = root_dir;
336 let mut current_file_count = total_files;
337 let mut new_subfs_uris: Vec<(String, String)> = Vec::new();
338
339 // Estimate initial manifest size
340 let mut manifest_size = subfs_utils::estimate_directory_size(&working_directory);
341
342 if total_files >= FILE_COUNT_THRESHOLD || manifest_size > MAX_MANIFEST_SIZE {
343 println!("\n⚠️ Large site detected ({} files, {:.1}KB manifest), splitting into subfs records...",
344 total_files, manifest_size as f64 / 1024.0);
345
346 let mut attempts = 0;
347 const MAX_SPLIT_ATTEMPTS: usize = 50;
348
349 while (manifest_size > MAX_MANIFEST_SIZE || current_file_count > TARGET_FILE_COUNT) && attempts < MAX_SPLIT_ATTEMPTS {
350 attempts += 1;
351
352 // Find large directories to split
353 let directories = subfs_utils::find_large_directories(&working_directory, String::new());
354
355 if let Some(largest_dir) = directories.first() {
356 println!(" Split #{}: {} ({} files, {:.1}KB)",
357 attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0);
358
359 // Check if this directory is itself too large for a single subfs record
360 const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety
361 let mut subfs_uri = String::new();
362
363 if largest_dir.size > MAX_SUBFS_SIZE {
364 // Need to split this directory into multiple chunks
365 println!(" → Directory too large, splitting into chunks...");
366 let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE);
367 println!(" → Created {} chunks", chunks.len());
368
369 // Upload each chunk as a subfs record
370 let mut chunk_uris = Vec::new();
371 for (i, chunk) in chunks.iter().enumerate() {
372 use jacquard_common::types::string::Tid;
373 let chunk_tid = Tid::now_0();
374 let chunk_rkey = chunk_tid.to_string();
375
376 let chunk_file_count = subfs_utils::count_files_in_directory(chunk);
377 let chunk_size = subfs_utils::estimate_directory_size(chunk);
378
379 let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new()
380 .root(convert_fs_dir_to_subfs_dir(chunk.clone()))
381 .file_count(Some(chunk_file_count as i64))
382 .created_at(Datetime::now())
383 .build();
384
385 println!(" → Uploading chunk {}/{} ({} files, {:.1}KB)...",
386 i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0);
387
388 let chunk_output = agent.put_record(
389 RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?),
390 chunk_manifest
391 ).await.into_diagnostic()?;
392
393 let chunk_uri = chunk_output.uri.to_string();
394 chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));
395 new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));
396 }
397
398 // Create a parent subfs record that references all chunks
399 // Each chunk reference MUST have flat: true to merge chunk contents
400 println!(" → Creating parent subfs with {} chunk references...", chunk_uris.len());
401 use jacquard_common::CowStr;
402 use crate::place_wisp::fs::{Subfs};
403
404 // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs
405 let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| {
406 let uri_string = uri.clone();
407 let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI");
408 Entry::new()
409 .name(CowStr::from(format!("chunk{}", i)))
410 .node(EntryNode::Subfs(Box::new(
411 Subfs::new()
412 .r#type(CowStr::from("subfs"))
413 .subject(at_uri)
414 .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents
415 .build()
416 )))
417 .build()
418 }).collect();
419
420 let parent_root_fs = Directory::new()
421 .r#type(CowStr::from("directory"))
422 .entries(parent_entries_fs)
423 .build();
424
425 // Convert to subfs::Directory for the parent subfs record
426 let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs);
427
428 use jacquard_common::types::string::Tid;
429 let parent_tid = Tid::now_0();
430 let parent_rkey = parent_tid.to_string();
431
432 let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new()
433 .root(parent_root_subfs)
434 .file_count(Some(largest_dir.file_count as i64))
435 .created_at(Datetime::now())
436 .build();
437
438 let parent_output = agent.put_record(
439 RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?),
440 parent_manifest
441 ).await.into_diagnostic()?;
442
443 subfs_uri = parent_output.uri.to_string();
444 println!(" ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri);
445 } else {
446 // Directory fits in a single subfs record
447 use jacquard_common::types::string::Tid;
448 let subfs_tid = Tid::now_0();
449 let subfs_rkey = subfs_tid.to_string();
450
451 let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new()
452 .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone()))
453 .file_count(Some(largest_dir.file_count as i64))
454 .created_at(Datetime::now())
455 .build();
456
457 // Upload subfs record
458 let subfs_output = agent.put_record(
459 RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?),
460 subfs_manifest
461 ).await.into_diagnostic()?;
462
463 subfs_uri = subfs_output.uri.to_string();
464 println!(" ✅ Created subfs: {}", subfs_uri);
465 }
466
467 // Replace directory with subfs node (flat: false to preserve directory structure)
468 working_directory = subfs_utils::replace_directory_with_subfs(
469 working_directory,
470 &largest_dir.path,
471 &subfs_uri,
472 false // Preserve directory - the chunks inside have flat=true
473 )?;
474
475 new_subfs_uris.push((subfs_uri, largest_dir.path.clone()));
476 current_file_count -= largest_dir.file_count;
477
478 // Recalculate manifest size
479 manifest_size = subfs_utils::estimate_directory_size(&working_directory);
480 println!(" → Manifest now {:.1}KB with {} files ({} subfs total)",
481 manifest_size as f64 / 1024.0, current_file_count, new_subfs_uris.len());
482
483 if manifest_size <= MAX_MANIFEST_SIZE && current_file_count <= TARGET_FILE_COUNT {
484 println!("✅ Manifest now fits within limits");
485 break;
486 }
487 } else {
488 println!(" No more subdirectories to split - stopping");
489 break;
490 }
491 }
492
493 if attempts >= MAX_SPLIT_ATTEMPTS {
494 return Err(miette::miette!(
495 "Exceeded maximum split attempts ({}). Manifest still too large: {:.1}KB with {} files",
496 MAX_SPLIT_ATTEMPTS,
497 manifest_size as f64 / 1024.0,
498 current_file_count
499 ));
500 }
501
502 println!("✅ Split complete: {} subfs records, {} files in main manifest, {:.1}KB",
503 new_subfs_uris.len(), current_file_count, manifest_size as f64 / 1024.0);
504 } else {
505 println!("Manifest created ({} files, {:.1}KB) - no splitting needed",
506 total_files, manifest_size as f64 / 1024.0);
507 }
508
509 // Create the final Fs record
510 let fs_record = Fs::new()
511 .site(CowStr::from(site_name.clone()))
512 .root(working_directory)
513 .file_count(current_file_count as i64)
514 .created_at(Datetime::now())
515 .build();
516
517 // Use site name as the record key
518 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?;
519 let output = agent.put_record(RecordKey::from(rkey), fs_record).await?;
520
521 // Extract DID from the AT URI (format: at://did:plc:xxx/collection/rkey)
522 let uri_str = output.uri.to_string();
523 let did = uri_str
524 .strip_prefix("at://")
525 .and_then(|s| s.split('/').next())
526 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?;
527
528 println!("\n✓ Deployed site '{}': {}", site_name, output.uri);
529 println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count);
530 println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name);
531
532 // Clean up old subfs records
533 if !old_subfs_uris.is_empty() {
534 println!("\nCleaning up {} old subfs records...", old_subfs_uris.len());
535
536 let mut deleted_count = 0;
537 let mut failed_count = 0;
538
539 for (uri, _path) in old_subfs_uris {
540 match subfs_utils::delete_subfs_record(agent, &uri).await {
541 Ok(_) => {
542 deleted_count += 1;
543 println!(" 🗑️ Deleted old subfs: {}", uri);
544 }
545 Err(e) => {
546 failed_count += 1;
547 eprintln!(" ⚠️ Failed to delete {}: {}", uri, e);
548 }
549 }
550 }
551
552 if failed_count > 0 {
553 eprintln!("⚠️ Cleanup completed with {} deleted, {} failed", deleted_count, failed_count);
554 } else {
555 println!("✅ Cleanup complete: {} old subfs records deleted", deleted_count);
556 }
557 }
558
559 // Upload settings if either flag is set
560 if directory_listing || spa_mode {
561 // Validate mutual exclusivity
562 if directory_listing && spa_mode {
563 return Err(miette::miette!("Cannot enable both --directory and --SPA modes"));
564 }
565
566 println!("\n⚙️ Uploading site settings...");
567
568 // Build settings record
569 let mut settings_builder = Settings::new();
570
571 if directory_listing {
572 settings_builder = settings_builder.directory_listing(Some(true));
573 println!(" • Directory listing: enabled");
574 }
575
576 if spa_mode {
577 settings_builder = settings_builder.spa_mode(Some(CowStr::from("index.html")));
578 println!(" • SPA mode: enabled (serving index.html for all routes)");
579 }
580
581 let settings_record = settings_builder.build();
582
583 // Upload settings record with same rkey as site
584 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?;
585 match agent.put_record(RecordKey::from(rkey), settings_record).await {
586 Ok(settings_output) => {
587 println!("✅ Settings uploaded: {}", settings_output.uri);
588 }
589 Err(e) => {
590 eprintln!("⚠️ Failed to upload settings: {}", e);
591 eprintln!(" Site was deployed successfully, but settings may need to be configured manually.");
592 }
593 }
594 }
595
596 Ok(())
597}
598
599/// Recursively build a Directory from a filesystem path
600/// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir)
601fn build_directory<'a>(
602 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>,
603 dir_path: &'a Path,
604 existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
605 current_path: String,
606) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>>
607{
608 Box::pin(async move {
609 // Collect all directory entries first
610 let dir_entries: Vec<_> = std::fs::read_dir(dir_path)
611 .into_diagnostic()?
612 .collect::<Result<Vec<_>, _>>()
613 .into_diagnostic()?;
614
615 // Separate files and directories
616 let mut file_tasks = Vec::new();
617 let mut dir_tasks = Vec::new();
618
619 for entry in dir_entries {
620 let path = entry.path();
621 let name = entry.file_name();
622 let name_str = name.to_str()
623 .ok_or_else(|| miette::miette!("Invalid filename: {:?}", name))?
624 .to_string();
625
626 // Skip unwanted files and directories
627
628 // .git directory (version control - thousands of files)
629 if name_str == ".git" {
630 continue;
631 }
632
633 // .DS_Store (macOS metadata - can leak info)
634 if name_str == ".DS_Store" {
635 continue;
636 }
637
638 // .wisp.metadata.json (wisp internal metadata - should not be uploaded)
639 if name_str == ".wisp.metadata.json" {
640 continue;
641 }
642
643 // .env files (environment variables with secrets)
644 if name_str.starts_with(".env") {
645 continue;
646 }
647
648 // node_modules (dependency folder - can be 100,000+ files)
649 if name_str == "node_modules" {
650 continue;
651 }
652
653 // OS metadata files
654 if name_str == "Thumbs.db" || name_str == "desktop.ini" || name_str.starts_with("._") {
655 continue;
656 }
657
658 // macOS system directories
659 if name_str == ".Spotlight-V100" || name_str == ".Trashes" || name_str == ".fseventsd" {
660 continue;
661 }
662
663 // Cache and temp directories
664 if name_str == ".cache" || name_str == ".temp" || name_str == ".tmp" {
665 continue;
666 }
667
668 // Python cache
669 if name_str == "__pycache__" || name_str.ends_with(".pyc") {
670 continue;
671 }
672
673 // Python virtual environments
674 if name_str == ".venv" || name_str == "venv" || name_str == "env" {
675 continue;
676 }
677
678 // Editor swap files
679 if name_str.ends_with(".swp") || name_str.ends_with(".swo") || name_str.ends_with("~") {
680 continue;
681 }
682
683 let metadata = entry.metadata().into_diagnostic()?;
684
685 if metadata.is_file() {
686 // Construct full path for this file (for blob map lookup)
687 let full_path = if current_path.is_empty() {
688 name_str.clone()
689 } else {
690 format!("{}/{}", current_path, name_str)
691 };
692 file_tasks.push((name_str, path, full_path));
693 } else if metadata.is_dir() {
694 dir_tasks.push((name_str, path));
695 }
696 }
697
698 // Process files concurrently with a limit of 5
699 let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks)
700 .map(|(name, path, full_path)| async move {
701 let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs).await?;
702 let entry = Entry::new()
703 .name(CowStr::from(name))
704 .node(EntryNode::File(Box::new(file_node)))
705 .build();
706 Ok::<_, miette::Report>((entry, reused))
707 })
708 .buffer_unordered(5)
709 .collect::<Vec<_>>()
710 .await
711 .into_iter()
712 .collect::<miette::Result<Vec<_>>>()?;
713
714 let mut file_entries = Vec::new();
715 let mut reused_count = 0;
716 let mut total_files = 0;
717
718 for (entry, reused) in file_results {
719 file_entries.push(entry);
720 total_files += 1;
721 if reused {
722 reused_count += 1;
723 }
724 }
725
726 // Process directories recursively (sequentially to avoid too much nesting)
727 let mut dir_entries = Vec::new();
728 for (name, path) in dir_tasks {
729 // Construct full path for subdirectory
730 let subdir_path = if current_path.is_empty() {
731 name.clone()
732 } else {
733 format!("{}/{}", current_path, name)
734 };
735 let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path).await?;
736 dir_entries.push(Entry::new()
737 .name(CowStr::from(name))
738 .node(EntryNode::Directory(Box::new(subdir)))
739 .build());
740 total_files += sub_total;
741 reused_count += sub_reused;
742 }
743
744 // Combine file and directory entries
745 let mut entries = file_entries;
746 entries.extend(dir_entries);
747
748 let directory = Directory::new()
749 .r#type(CowStr::from("directory"))
750 .entries(entries)
751 .build();
752
753 Ok((directory, total_files, reused_count))
754 })
755}
756
757/// Process a single file: gzip -> base64 -> upload blob (or reuse existing)
758/// Returns (File, reused: bool)
759/// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup
760///
761/// Special handling: _redirects files are NOT compressed (uploaded as-is)
762async fn process_file(
763 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>,
764 file_path: &Path,
765 file_path_key: &str,
766 existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
767) -> miette::Result<(File<'static>, bool)>
768{
769 // Read file
770 let file_data = std::fs::read(file_path).into_diagnostic()?;
771
772 // Detect original MIME type
773 let original_mime = mime_guess::from_path(file_path)
774 .first_or_octet_stream()
775 .to_string();
776
777 // Check if this is a _redirects file (don't compress it)
778 let is_redirects_file = file_path.file_name()
779 .and_then(|n| n.to_str())
780 .map(|n| n == "_redirects")
781 .unwrap_or(false);
782
783 let (upload_bytes, encoding, is_base64) = if is_redirects_file {
784 // Don't compress _redirects - upload as-is
785 (file_data.clone(), None, false)
786 } else {
787 // Gzip compress
788 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
789 encoder.write_all(&file_data).into_diagnostic()?;
790 let gzipped = encoder.finish().into_diagnostic()?;
791
792 // Base64 encode the gzipped data
793 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes();
794 (base64_bytes, Some("gzip"), true)
795 };
796
797 // Compute CID for this file
798 let file_cid = cid::compute_cid(&upload_bytes);
799
800 // Check if we have an existing blob with the same CID
801 let existing_blob = existing_blobs.get(file_path_key);
802
803 if let Some((existing_blob_ref, existing_cid)) = existing_blob {
804 if existing_cid == &file_cid {
805 // CIDs match - reuse existing blob
806 println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid);
807 let mut file_builder = File::new()
808 .r#type(CowStr::from("file"))
809 .blob(existing_blob_ref.clone())
810 .mime_type(CowStr::from(original_mime));
811
812 if let Some(enc) = encoding {
813 file_builder = file_builder.encoding(CowStr::from(enc));
814 }
815 if is_base64 {
816 file_builder = file_builder.base64(true);
817 }
818
819 return Ok((file_builder.build(), true));
820 } else {
821 // CID mismatch - file changed
822 println!(" → File changed: {} (old CID: {}, new CID: {})", file_path_key, existing_cid, file_cid);
823 }
824 } else {
825 // File not in existing blob map
826 if file_path_key.starts_with("imgs/") {
827 println!(" → New file (not in blob map): {}", file_path_key);
828 }
829 }
830
831 // File is new or changed - upload it
832 let mime_type = if is_redirects_file {
833 MimeType::new_static("text/plain")
834 } else {
835 MimeType::new_static("application/octet-stream")
836 };
837
838 println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, upload_bytes.len(), file_cid);
839 let blob = agent.upload_blob(upload_bytes, mime_type).await?;
840
841 let mut file_builder = File::new()
842 .r#type(CowStr::from("file"))
843 .blob(blob)
844 .mime_type(CowStr::from(original_mime));
845
846 if let Some(enc) = encoding {
847 file_builder = file_builder.encoding(CowStr::from(enc));
848 }
849 if is_base64 {
850 file_builder = file_builder.base64(true);
851 }
852
853 Ok((file_builder.build(), false))
854}
855
856/// Convert fs::Directory to subfs::Directory
857/// They have the same structure, but different types
858fn convert_fs_dir_to_subfs_dir(fs_dir: place_wisp::fs::Directory<'static>) -> place_wisp::subfs::Directory<'static> {
859 use place_wisp::subfs::{Directory as SubfsDirectory, Entry as SubfsEntry, EntryNode as SubfsEntryNode, File as SubfsFile};
860
861 let subfs_entries: Vec<SubfsEntry> = fs_dir.entries.into_iter().map(|entry| {
862 let node = match entry.node {
863 place_wisp::fs::EntryNode::File(file) => {
864 SubfsEntryNode::File(Box::new(SubfsFile::new()
865 .r#type(file.r#type)
866 .blob(file.blob)
867 .encoding(file.encoding)
868 .mime_type(file.mime_type)
869 .base64(file.base64)
870 .build()))
871 }
872 place_wisp::fs::EntryNode::Directory(dir) => {
873 SubfsEntryNode::Directory(Box::new(convert_fs_dir_to_subfs_dir(*dir)))
874 }
875 place_wisp::fs::EntryNode::Subfs(subfs) => {
876 // Nested subfs in the directory we're converting
877 // Note: subfs::Subfs doesn't have the 'flat' field - that's only in fs::Subfs
878 SubfsEntryNode::Subfs(Box::new(place_wisp::subfs::Subfs::new()
879 .r#type(subfs.r#type)
880 .subject(subfs.subject)
881 .build()))
882 }
883 place_wisp::fs::EntryNode::Unknown(unknown) => {
884 SubfsEntryNode::Unknown(unknown)
885 }
886 };
887
888 SubfsEntry::new()
889 .name(entry.name)
890 .node(node)
891 .build()
892 }).collect();
893
894 SubfsDirectory::new()
895 .r#type(fs_dir.r#type)
896 .entries(subfs_entries)
897 .build()
898}
899