cli/src/main.rs at 804d61c021ba00d42fa861f07bf411285d1e0a0c · nekomimi.pet/wisp.place-monorepo

Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
wisp.place-monorepo / cli / src / main.rs
at 804d61c021ba00d42fa861f07bf411285d1e0a0c 35 kB view raw
  1mod builder_types;
  2mod place_wisp;
  3mod cid;
  4mod blob_map;
  5mod metadata;
  6mod download;
  7mod pull;
  8mod serve;
  9mod subfs_utils;
 10mod redirects;
 11mod ignore_patterns;
 12
 13use clap::{Parser, Subcommand};
 14use jacquard::CowStr;
 15use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession};
 16use jacquard::oauth::client::OAuthClient;
 17use jacquard::oauth::loopback::LoopbackConfig;
 18use jacquard::prelude::IdentityResolver;
 19use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri};
 20use jacquard_common::types::blob::MimeType;
 21use miette::IntoDiagnostic;
 22use std::path::{Path, PathBuf};
 23use std::collections::HashMap;
 24use flate2::Compression;
 25use flate2::write::GzEncoder;
 26use std::io::Write;
 27use base64::Engine;
 28use futures::stream::{self, StreamExt};
 29use indicatif::{ProgressBar, ProgressStyle, MultiProgress};
 30
 31use place_wisp::fs::*;
 32use place_wisp::settings::*;
 33
 34/// Maximum number of concurrent file uploads to the PDS
 35const MAX_CONCURRENT_UPLOADS: usize = 2;
 36
 37#[derive(Parser, Debug)]
 38#[command(author, version, about = "wisp.place CLI tool")]
 39struct Args {
 40    #[command(subcommand)]
 41    command: Option<Commands>,
 42    
 43    // Deploy arguments (when no subcommand is specified)
 44    /// Handle (e.g., alice.bsky.social), DID, or PDS URL
 45    #[arg(global = true, conflicts_with = "command")]
 46    input: Option<CowStr<'static>>,
 47
 48    /// Path to the directory containing your static site
 49    #[arg(short, long, global = true, conflicts_with = "command")]
 50    path: Option<PathBuf>,
 51
 52    /// Site name (defaults to directory name)
 53    #[arg(short, long, global = true, conflicts_with = "command")]
 54    site: Option<String>,
 55
 56    /// Path to auth store file
 57    #[arg(long, global = true, conflicts_with = "command")]
 58    store: Option<String>,
 59
 60    /// App Password for authentication
 61    #[arg(long, global = true, conflicts_with = "command")]
 62    password: Option<CowStr<'static>>,
 63
 64    /// Enable directory listing mode for paths without index files
 65    #[arg(long, global = true, conflicts_with = "command")]
 66    directory: bool,
 67
 68    /// Enable SPA mode (serve index.html for all routes)
 69    #[arg(long, global = true, conflicts_with = "command")]
 70    spa: bool,
 71}
 72
 73#[derive(Subcommand, Debug)]
 74enum Commands {
 75    /// Deploy a static site to wisp.place (default command)
 76    Deploy {
 77        /// Handle (e.g., alice.bsky.social), DID, or PDS URL
 78        input: CowStr<'static>,
 79
 80        /// Path to the directory containing your static site
 81        #[arg(short, long, default_value = ".")]
 82        path: PathBuf,
 83
 84        /// Site name (defaults to directory name)
 85        #[arg(short, long)]
 86        site: Option<String>,
 87
 88        /// Path to auth store file (will be created if missing, only used with OAuth)
 89        #[arg(long, default_value = "/tmp/wisp-oauth-session.json")]
 90        store: String,
 91
 92        /// App Password for authentication (alternative to OAuth)
 93        #[arg(long)]
 94        password: Option<CowStr<'static>>,
 95
 96        /// Enable directory listing mode for paths without index files
 97        #[arg(long)]
 98        directory: bool,
 99
100        /// Enable SPA mode (serve index.html for all routes)
101        #[arg(long)]
102        spa: bool,
103    },
104    /// Pull a site from the PDS to a local directory
105    Pull {
106        /// Handle (e.g., alice.bsky.social) or DID
107        input: CowStr<'static>,
108
109        /// Site name (record key)
110        #[arg(short, long)]
111        site: String,
112
113        /// Output directory for the downloaded site
114        #[arg(short, long, default_value = ".")]
115        output: PathBuf,
116    },
117    /// Serve a site locally with real-time firehose updates
118    Serve {
119        /// Handle (e.g., alice.bsky.social) or DID
120        input: CowStr<'static>,
121
122        /// Site name (record key)
123        #[arg(short, long)]
124        site: String,
125
126        /// Output directory for the site files
127        #[arg(short, long, default_value = ".")]
128        output: PathBuf,
129
130        /// Port to serve on
131        #[arg(short, long, default_value = "8080")]
132        port: u16,
133    },
134}
135
136#[tokio::main]
137async fn main() -> miette::Result<()> {
138    let args = Args::parse();
139
140    let result = match args.command {
141        Some(Commands::Deploy { input, path, site, store, password, directory, spa }) => {
142            // Dispatch to appropriate authentication method
143            if let Some(password) = password {
144                run_with_app_password(input, password, path, site, directory, spa).await
145            } else {
146                run_with_oauth(input, store, path, site, directory, spa).await
147            }
148        }
149        Some(Commands::Pull { input, site, output }) => {
150            pull::pull_site(input, CowStr::from(site), output).await
151        }
152        Some(Commands::Serve { input, site, output, port }) => {
153            serve::serve_site(input, CowStr::from(site), output, port).await
154        }
155        None => {
156            // Legacy mode: if input is provided, assume deploy command
157            if let Some(input) = args.input {
158                let path = args.path.unwrap_or_else(|| PathBuf::from("."));
159                let store = args.store.unwrap_or_else(|| "/tmp/wisp-oauth-session.json".to_string());
160
161                // Dispatch to appropriate authentication method
162                if let Some(password) = args.password {
163                    run_with_app_password(input, password, path, args.site, args.directory, args.spa).await
164                } else {
165                    run_with_oauth(input, store, path, args.site, args.directory, args.spa).await
166                }
167            } else {
168                // No command and no input, show help
169                use clap::CommandFactory;
170                Args::command().print_help().into_diagnostic()?;
171                Ok(())
172            }
173        }
174    };
175
176    // Force exit to avoid hanging on background tasks/connections
177    match result {
178        Ok(_) => std::process::exit(0),
179        Err(e) => {
180            eprintln!("{:?}", e);
181            std::process::exit(1)
182        }
183    }
184}
185
186/// Run deployment with app password authentication
187async fn run_with_app_password(
188    input: CowStr<'static>,
189    password: CowStr<'static>,
190    path: PathBuf,
191    site: Option<String>,
192    directory: bool,
193    spa: bool,
194) -> miette::Result<()> {
195    let (session, auth) =
196        MemoryCredentialSession::authenticated(input, password, None, None).await?;
197    println!("Signed in as {}", auth.handle);
198
199    let agent: Agent<_> = Agent::from(session);
200    deploy_site(&agent, path, site, directory, spa).await
201}
202
203/// Run deployment with OAuth authentication
204async fn run_with_oauth(
205    input: CowStr<'static>,
206    store: String,
207    path: PathBuf,
208    site: Option<String>,
209    directory: bool,
210    spa: bool,
211) -> miette::Result<()> {
212    use jacquard::oauth::scopes::Scope;
213    use jacquard::oauth::atproto::AtprotoClientMetadata;
214    use jacquard::oauth::session::ClientData;
215    use url::Url;
216
217    // Request the necessary scopes for wisp.place (including settings)
218    let scopes = Scope::parse_multiple("atproto repo:place.wisp.fs repo:place.wisp.subfs repo:place.wisp.settings blob:*/*")
219        .map_err(|e| miette::miette!("Failed to parse scopes: {:?}", e))?;
220
221    // Create redirect URIs that match the loopback server (port 4000, path /oauth/callback)
222    let redirect_uris = vec![
223        Url::parse("http://127.0.0.1:4000/oauth/callback").into_diagnostic()?,
224        Url::parse("http://[::1]:4000/oauth/callback").into_diagnostic()?,
225    ];
226
227    // Create client metadata with matching redirect URIs and scopes
228    let client_data = ClientData {
229        keyset: None,
230        config: AtprotoClientMetadata::new_localhost(
231            Some(redirect_uris),
232            Some(scopes),
233        ),
234    };
235
236    let oauth = OAuthClient::new(FileAuthStore::new(&store), client_data);
237
238    let session = oauth
239        .login_with_local_server(input, Default::default(), LoopbackConfig::default())
240        .await?;
241
242    let agent: Agent<_> = Agent::from(session);
243    deploy_site(&agent, path, site, directory, spa).await
244}
245
246/// Deploy the site using the provided agent
247async fn deploy_site(
248    agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>,
249    path: PathBuf,
250    site: Option<String>,
251    directory_listing: bool,
252    spa_mode: bool,
253) -> miette::Result<()> {
254    // Verify the path exists
255    if !path.exists() {
256        return Err(miette::miette!("Path does not exist: {}", path.display()));
257    }
258
259    // Get site name
260    let site_name = site.unwrap_or_else(|| {
261        path
262            .file_name()
263            .and_then(|n| n.to_str())
264            .unwrap_or("site")
265            .to_string()
266    });
267
268    println!("Deploying site '{}'...", site_name);
269
270    // Try to fetch existing manifest for incremental updates
271    let (existing_blob_map, old_subfs_uris): (HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, Vec<(String, String)>) = {
272        use jacquard_common::types::string::AtUri;
273
274        // Get the DID for this session
275        let session_info = agent.session_info().await;
276        if let Some((did, _)) = session_info {
277            // Construct the AT URI for the record
278            let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name);
279            if let Ok(uri) = AtUri::new(&uri_string) {
280                match agent.get_record::<Fs>(&uri).await {
281                    Ok(response) => {
282                        match response.into_output() {
283                            Ok(record_output) => {
284                                let existing_manifest = record_output.value;
285                                let mut blob_map = blob_map::extract_blob_map(&existing_manifest.root);
286                                println!("Found existing manifest with {} files in main record", blob_map.len());
287
288                                // Extract subfs URIs from main record
289                                let subfs_uris = subfs_utils::extract_subfs_uris(&existing_manifest.root, String::new());
290
291                                if !subfs_uris.is_empty() {
292                                    println!("Found {} subfs records, fetching for blob reuse...", subfs_uris.len());
293
294                                    // Merge blob maps from all subfs records
295                                    match subfs_utils::merge_subfs_blob_maps(agent, subfs_uris.clone(), &mut blob_map).await {
296                                        Ok(merged_count) => {
297                                            println!("Total blob map: {} files (main + {} from subfs)", blob_map.len(), merged_count);
298                                        }
299                                        Err(e) => {
300                                            eprintln!("⚠️  Failed to merge some subfs blob maps: {}", e);
301                                        }
302                                    }
303
304                                    (blob_map, subfs_uris)
305                                } else {
306                                    (blob_map, Vec::new())
307                                }
308                            }
309                            Err(_) => {
310                                println!("No existing manifest found, uploading all files...");
311                                (HashMap::new(), Vec::new())
312                            }
313                        }
314                    }
315                    Err(_) => {
316                        // Record doesn't exist yet - this is a new site
317                        println!("No existing manifest found, uploading all files...");
318                        (HashMap::new(), Vec::new())
319                    }
320                }
321            } else {
322                println!("No existing manifest found (invalid URI), uploading all files...");
323                (HashMap::new(), Vec::new())
324            }
325        } else {
326            println!("No existing manifest found (could not get DID), uploading all files...");
327            (HashMap::new(), Vec::new())
328        }
329    };
330
331    // Build directory tree with ignore patterns
332    let ignore_matcher = ignore_patterns::IgnoreMatcher::new(&path)?;
333
334    // Create progress tracking (spinner style since we don't know total count upfront)
335    let multi_progress = MultiProgress::new();
336    let progress = multi_progress.add(ProgressBar::new_spinner());
337    progress.set_style(
338        ProgressStyle::default_spinner()
339            .template("[{elapsed_precise}] {spinner:.cyan} {pos} files {msg}")
340            .into_diagnostic()?
341            .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ")
342    );
343    progress.set_message("Scanning files...");
344    progress.enable_steady_tick(std::time::Duration::from_millis(100));
345
346    let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new(), &ignore_matcher, &progress).await?;
347    let uploaded_count = total_files - reused_count;
348
349    progress.finish_with_message(format!("✓ {} files ({} uploaded, {} reused)", total_files, uploaded_count, reused_count));
350
351    // Check if we need to split into subfs records
352    const MAX_MANIFEST_SIZE: usize = 140 * 1024; // 140KB (PDS limit is 150KB)
353    const FILE_COUNT_THRESHOLD: usize = 250; // Start splitting at this many files
354    const TARGET_FILE_COUNT: usize = 200; // Keep main manifest under this
355
356    let mut working_directory = root_dir;
357    let mut current_file_count = total_files;
358    let mut new_subfs_uris: Vec<(String, String)> = Vec::new();
359
360    // Estimate initial manifest size
361    let mut manifest_size = subfs_utils::estimate_directory_size(&working_directory);
362
363    if total_files >= FILE_COUNT_THRESHOLD || manifest_size > MAX_MANIFEST_SIZE {
364        println!("\n⚠️  Large site detected ({} files, {:.1}KB manifest), splitting into subfs records...",
365            total_files, manifest_size as f64 / 1024.0);
366
367        let mut attempts = 0;
368        const MAX_SPLIT_ATTEMPTS: usize = 50;
369
370        while (manifest_size > MAX_MANIFEST_SIZE || current_file_count > TARGET_FILE_COUNT) && attempts < MAX_SPLIT_ATTEMPTS {
371            attempts += 1;
372
373            // Find large directories to split
374            let directories = subfs_utils::find_large_directories(&working_directory, String::new());
375
376            if let Some(largest_dir) = directories.first() {
377                println!("  Split #{}: {} ({} files, {:.1}KB)",
378                    attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0);
379
380                // Check if this directory is itself too large for a single subfs record
381                const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety
382                let mut subfs_uri = String::new();
383
384                if largest_dir.size > MAX_SUBFS_SIZE {
385                    // Need to split this directory into multiple chunks
386                    println!("  → Directory too large, splitting into chunks...");
387                    let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE);
388                    println!("  → Created {} chunks", chunks.len());
389
390                    // Upload each chunk as a subfs record
391                    let mut chunk_uris = Vec::new();
392                    for (i, chunk) in chunks.iter().enumerate() {
393                        use jacquard_common::types::string::Tid;
394                        let chunk_tid = Tid::now_0();
395                        let chunk_rkey = chunk_tid.to_string();
396
397                        let chunk_file_count = subfs_utils::count_files_in_directory(chunk);
398                        let chunk_size = subfs_utils::estimate_directory_size(chunk);
399
400                        let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new()
401                            .root(convert_fs_dir_to_subfs_dir(chunk.clone()))
402                            .file_count(Some(chunk_file_count as i64))
403                            .created_at(Datetime::now())
404                            .build();
405
406                        println!("  → Uploading chunk {}/{} ({} files, {:.1}KB)...",
407                            i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0);
408
409                        let chunk_output = agent.put_record(
410                            RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?),
411                            chunk_manifest
412                        ).await.into_diagnostic()?;
413
414                        let chunk_uri = chunk_output.uri.to_string();
415                        chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));
416                        new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));
417                    }
418
419                    // Create a parent subfs record that references all chunks
420                    // Each chunk reference MUST have flat: true to merge chunk contents
421                    println!("  → Creating parent subfs with {} chunk references...", chunk_uris.len());
422                    use jacquard_common::CowStr;
423                    use crate::place_wisp::fs::{Subfs};
424
425                    // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs
426                    let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| {
427                        let uri_string = uri.clone();
428                        let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI");
429                        Entry::new()
430                            .name(CowStr::from(format!("chunk{}", i)))
431                            .node(EntryNode::Subfs(Box::new(
432                                Subfs::new()
433                                    .r#type(CowStr::from("subfs"))
434                                    .subject(at_uri)
435                                    .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents
436                                    .build()
437                            )))
438                            .build()
439                    }).collect();
440
441                    let parent_root_fs = Directory::new()
442                        .r#type(CowStr::from("directory"))
443                        .entries(parent_entries_fs)
444                        .build();
445
446                    // Convert to subfs::Directory for the parent subfs record
447                    let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs);
448
449                    use jacquard_common::types::string::Tid;
450                    let parent_tid = Tid::now_0();
451                    let parent_rkey = parent_tid.to_string();
452
453                    let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new()
454                        .root(parent_root_subfs)
455                        .file_count(Some(largest_dir.file_count as i64))
456                        .created_at(Datetime::now())
457                        .build();
458
459                    let parent_output = agent.put_record(
460                        RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?),
461                        parent_manifest
462                    ).await.into_diagnostic()?;
463
464                    subfs_uri = parent_output.uri.to_string();
465                    println!("  ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri);
466                } else {
467                    // Directory fits in a single subfs record
468                    use jacquard_common::types::string::Tid;
469                    let subfs_tid = Tid::now_0();
470                    let subfs_rkey = subfs_tid.to_string();
471
472                    let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new()
473                        .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone()))
474                        .file_count(Some(largest_dir.file_count as i64))
475                        .created_at(Datetime::now())
476                        .build();
477
478                    // Upload subfs record
479                    let subfs_output = agent.put_record(
480                        RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?),
481                        subfs_manifest
482                    ).await.into_diagnostic()?;
483
484                    subfs_uri = subfs_output.uri.to_string();
485                    println!("  ✅ Created subfs: {}", subfs_uri);
486                }
487
488                // Replace directory with subfs node (flat: false to preserve directory structure)
489                working_directory = subfs_utils::replace_directory_with_subfs(
490                    working_directory,
491                    &largest_dir.path,
492                    &subfs_uri,
493                    false // Preserve directory - the chunks inside have flat=true
494                )?;
495
496                new_subfs_uris.push((subfs_uri, largest_dir.path.clone()));
497                current_file_count -= largest_dir.file_count;
498
499                // Recalculate manifest size
500                manifest_size = subfs_utils::estimate_directory_size(&working_directory);
501                println!("  → Manifest now {:.1}KB with {} files ({} subfs total)",
502                    manifest_size as f64 / 1024.0, current_file_count, new_subfs_uris.len());
503
504                if manifest_size <= MAX_MANIFEST_SIZE && current_file_count <= TARGET_FILE_COUNT {
505                    println!("✅ Manifest now fits within limits");
506                    break;
507                }
508            } else {
509                println!("  No more subdirectories to split - stopping");
510                break;
511            }
512        }
513
514        if attempts >= MAX_SPLIT_ATTEMPTS {
515            return Err(miette::miette!(
516                "Exceeded maximum split attempts ({}). Manifest still too large: {:.1}KB with {} files",
517                MAX_SPLIT_ATTEMPTS,
518                manifest_size as f64 / 1024.0,
519                current_file_count
520            ));
521        }
522
523        println!("✅ Split complete: {} subfs records, {} files in main manifest, {:.1}KB",
524            new_subfs_uris.len(), current_file_count, manifest_size as f64 / 1024.0);
525    } else {
526        println!("Manifest created ({} files, {:.1}KB) - no splitting needed",
527            total_files, manifest_size as f64 / 1024.0);
528    }
529
530    // Create the final Fs record
531    let fs_record = Fs::new()
532        .site(CowStr::from(site_name.clone()))
533        .root(working_directory)
534        .file_count(current_file_count as i64)
535        .created_at(Datetime::now())
536        .build();
537
538    // Use site name as the record key
539    let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?;
540    let output = agent.put_record(RecordKey::from(rkey), fs_record).await?;
541
542    // Extract DID from the AT URI (format: at://did:plc:xxx/collection/rkey)
543    let uri_str = output.uri.to_string();
544    let did = uri_str
545        .strip_prefix("at://")
546        .and_then(|s| s.split('/').next())
547        .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?;
548
549    println!("\n✓ Deployed site '{}': {}", site_name, output.uri);
550    println!("  Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count);
551    println!("  Available at: https://sites.wisp.place/{}/{}", did, site_name);
552
553    // Clean up old subfs records
554    if !old_subfs_uris.is_empty() {
555        println!("\nCleaning up {} old subfs records...", old_subfs_uris.len());
556
557        let mut deleted_count = 0;
558        let mut failed_count = 0;
559
560        for (uri, _path) in old_subfs_uris {
561            match subfs_utils::delete_subfs_record(agent, &uri).await {
562                Ok(_) => {
563                    deleted_count += 1;
564                    println!("  🗑️  Deleted old subfs: {}", uri);
565                }
566                Err(e) => {
567                    failed_count += 1;
568                    eprintln!("  ⚠️  Failed to delete {}: {}", uri, e);
569                }
570            }
571        }
572
573        if failed_count > 0 {
574            eprintln!("⚠️  Cleanup completed with {} deleted, {} failed", deleted_count, failed_count);
575        } else {
576            println!("✅ Cleanup complete: {} old subfs records deleted", deleted_count);
577        }
578    }
579
580    // Upload settings if either flag is set
581    if directory_listing || spa_mode {
582        // Validate mutual exclusivity
583        if directory_listing && spa_mode {
584            return Err(miette::miette!("Cannot enable both --directory and --SPA modes"));
585        }
586
587        println!("\n⚙️  Uploading site settings...");
588
589        // Build settings record
590        let mut settings_builder = Settings::new();
591
592        if directory_listing {
593            settings_builder = settings_builder.directory_listing(Some(true));
594            println!("  • Directory listing: enabled");
595        }
596
597        if spa_mode {
598            settings_builder = settings_builder.spa_mode(Some(CowStr::from("index.html")));
599            println!("  • SPA mode: enabled (serving index.html for all routes)");
600        }
601
602        let settings_record = settings_builder.build();
603
604        // Upload settings record with same rkey as site
605        let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?;
606        match agent.put_record(RecordKey::from(rkey), settings_record).await {
607            Ok(settings_output) => {
608                println!("✅ Settings uploaded: {}", settings_output.uri);
609            }
610            Err(e) => {
611                eprintln!("⚠️  Failed to upload settings: {}", e);
612                eprintln!("   Site was deployed successfully, but settings may need to be configured manually.");
613            }
614        }
615    }
616
617    Ok(())
618}
619
620/// Recursively build a Directory from a filesystem path
621/// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir)
622fn build_directory<'a>(
623    agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>,
624    dir_path: &'a Path,
625    existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
626    current_path: String,
627    ignore_matcher: &'a ignore_patterns::IgnoreMatcher,
628    progress: &'a ProgressBar,
629) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>>
630{
631    Box::pin(async move {
632    // Collect all directory entries first
633    let dir_entries: Vec<_> = std::fs::read_dir(dir_path)
634        .into_diagnostic()?
635        .collect::<Result<Vec<_>, _>>()
636        .into_diagnostic()?;
637
638    // Separate files and directories
639    let mut file_tasks = Vec::new();
640    let mut dir_tasks = Vec::new();
641
642    for entry in dir_entries {
643        let path = entry.path();
644        let name = entry.file_name();
645        let name_str = name.to_str()
646            .ok_or_else(|| miette::miette!("Invalid filename: {:?}", name))?
647            .to_string();
648
649        // Construct full path for ignore checking
650        let full_path = if current_path.is_empty() {
651            name_str.clone()
652        } else {
653            format!("{}/{}", current_path, name_str)
654        };
655
656        // Skip files/directories that match ignore patterns
657        if ignore_matcher.is_ignored(&full_path) || ignore_matcher.is_filename_ignored(&name_str) {
658            continue;
659        }
660
661        let metadata = entry.metadata().into_diagnostic()?;
662
663        if metadata.is_file() {
664            // Construct full path for this file (for blob map lookup)
665            let full_path = if current_path.is_empty() {
666                name_str.clone()
667            } else {
668                format!("{}/{}", current_path, name_str)
669            };
670            file_tasks.push((name_str, path, full_path));
671        } else if metadata.is_dir() {
672            dir_tasks.push((name_str, path));
673        }
674    }
675
676    // Process files concurrently with a limit of 2
677    let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks)
678        .map(|(name, path, full_path)| async move {
679            let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs, progress).await?;
680            progress.inc(1);
681            let entry = Entry::new()
682                .name(CowStr::from(name))
683                .node(EntryNode::File(Box::new(file_node)))
684                .build();
685            Ok::<_, miette::Report>((entry, reused))
686        })
687        .buffer_unordered(MAX_CONCURRENT_UPLOADS)
688        .collect::<Vec<_>>()
689        .await
690        .into_iter()
691        .collect::<miette::Result<Vec<_>>>()?;
692    
693    let mut file_entries = Vec::new();
694    let mut reused_count = 0;
695    let mut total_files = 0;
696    
697    for (entry, reused) in file_results {
698        file_entries.push(entry);
699        total_files += 1;
700        if reused {
701            reused_count += 1;
702        }
703    }
704
705    // Process directories recursively (sequentially to avoid too much nesting)
706    let mut dir_entries = Vec::new();
707    for (name, path) in dir_tasks {
708        // Construct full path for subdirectory
709        let subdir_path = if current_path.is_empty() {
710            name.clone()
711        } else {
712            format!("{}/{}", current_path, name)
713        };
714        let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path, ignore_matcher, progress).await?;
715        dir_entries.push(Entry::new()
716            .name(CowStr::from(name))
717            .node(EntryNode::Directory(Box::new(subdir)))
718            .build());
719        total_files += sub_total;
720        reused_count += sub_reused;
721    }
722
723    // Combine file and directory entries
724    let mut entries = file_entries;
725    entries.extend(dir_entries);
726
727    let directory = Directory::new()
728        .r#type(CowStr::from("directory"))
729        .entries(entries)
730        .build();
731    
732    Ok((directory, total_files, reused_count))
733    })
734}
735
736/// Process a single file: gzip -> base64 -> upload blob (or reuse existing)
737/// Returns (File, reused: bool)
738/// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup
739///
740/// Special handling: _redirects files are NOT compressed (uploaded as-is)
741async fn process_file(
742    agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>,
743    file_path: &Path,
744    file_path_key: &str,
745    existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
746    progress: &ProgressBar,
747) -> miette::Result<(File<'static>, bool)>
748{
749    // Read file
750    let file_data = std::fs::read(file_path).into_diagnostic()?;
751
752    // Detect original MIME type
753    let original_mime = mime_guess::from_path(file_path)
754        .first_or_octet_stream()
755        .to_string();
756
757    // Check if this is a _redirects file (don't compress it)
758    let is_redirects_file = file_path.file_name()
759        .and_then(|n| n.to_str())
760        .map(|n| n == "_redirects")
761        .unwrap_or(false);
762
763    let (upload_bytes, encoding, is_base64) = if is_redirects_file {
764        // Don't compress _redirects - upload as-is
765        (file_data.clone(), None, false)
766    } else {
767        // Gzip compress
768        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
769        encoder.write_all(&file_data).into_diagnostic()?;
770        let gzipped = encoder.finish().into_diagnostic()?;
771
772        // Base64 encode the gzipped data
773        let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes();
774        (base64_bytes, Some("gzip"), true)
775    };
776
777    // Compute CID for this file
778    let file_cid = cid::compute_cid(&upload_bytes);
779
780    // Check if we have an existing blob with the same CID
781    let existing_blob = existing_blobs.get(file_path_key);
782
783    if let Some((existing_blob_ref, existing_cid)) = existing_blob {
784        if existing_cid == &file_cid {
785            // CIDs match - reuse existing blob
786            progress.set_message(format!("✓ Reused {}", file_path_key));
787            let mut file_builder = File::new()
788                .r#type(CowStr::from("file"))
789                .blob(existing_blob_ref.clone())
790                .mime_type(CowStr::from(original_mime));
791
792            if let Some(enc) = encoding {
793                file_builder = file_builder.encoding(CowStr::from(enc));
794            }
795            if is_base64 {
796                file_builder = file_builder.base64(true);
797            }
798
799            return Ok((file_builder.build(), true));
800        }
801    }
802
803    // File is new or changed - upload it
804    let mime_type = if is_redirects_file {
805        MimeType::new_static("text/plain")
806    } else {
807        MimeType::new_static("application/octet-stream")
808    };
809
810    // Format file size nicely
811    let size_str = if upload_bytes.len() < 1024 {
812        format!("{} B", upload_bytes.len())
813    } else if upload_bytes.len() < 1024 * 1024 {
814        format!("{:.1} KB", upload_bytes.len() as f64 / 1024.0)
815    } else {
816        format!("{:.1} MB", upload_bytes.len() as f64 / (1024.0 * 1024.0))
817    };
818
819    progress.set_message(format!("↑ Uploading {} ({})", file_path_key, size_str));
820    let blob = agent.upload_blob(upload_bytes, mime_type).await?;
821    progress.set_message(format!("✓ Uploaded {}", file_path_key));
822
823    let mut file_builder = File::new()
824        .r#type(CowStr::from("file"))
825        .blob(blob)
826        .mime_type(CowStr::from(original_mime));
827
828    if let Some(enc) = encoding {
829        file_builder = file_builder.encoding(CowStr::from(enc));
830    }
831    if is_base64 {
832        file_builder = file_builder.base64(true);
833    }
834
835    Ok((file_builder.build(), false))
836}
837
838/// Convert fs::Directory to subfs::Directory
839/// They have the same structure, but different types
840fn convert_fs_dir_to_subfs_dir(fs_dir: place_wisp::fs::Directory<'static>) -> place_wisp::subfs::Directory<'static> {
841    use place_wisp::subfs::{Directory as SubfsDirectory, Entry as SubfsEntry, EntryNode as SubfsEntryNode, File as SubfsFile};
842
843    let subfs_entries: Vec<SubfsEntry> = fs_dir.entries.into_iter().map(|entry| {
844        let node = match entry.node {
845            place_wisp::fs::EntryNode::File(file) => {
846                SubfsEntryNode::File(Box::new(SubfsFile::new()
847                    .r#type(file.r#type)
848                    .blob(file.blob)
849                    .encoding(file.encoding)
850                    .mime_type(file.mime_type)
851                    .base64(file.base64)
852                    .build()))
853            }
854            place_wisp::fs::EntryNode::Directory(dir) => {
855                SubfsEntryNode::Directory(Box::new(convert_fs_dir_to_subfs_dir(*dir)))
856            }
857            place_wisp::fs::EntryNode::Subfs(subfs) => {
858                // Nested subfs in the directory we're converting
859                // Note: subfs::Subfs doesn't have the 'flat' field - that's only in fs::Subfs
860                SubfsEntryNode::Subfs(Box::new(place_wisp::subfs::Subfs::new()
861                    .r#type(subfs.r#type)
862                    .subject(subfs.subject)
863                    .build()))
864            }
865            place_wisp::fs::EntryNode::Unknown(unknown) => {
866                SubfsEntryNode::Unknown(unknown)
867            }
868        };
869
870        SubfsEntry::new()
871            .name(entry.name)
872            .node(node)
873            .build()
874    }).collect();
875
876    SubfsDirectory::new()
877        .r#type(fs_dir.r#type)
878        .entries(subfs_entries)
879        .build()
880}
881