commit e3f99bc106c81621cb09ea109ea157958bb9ceb8 · nekomimi.pet/wisp.place-monorepo

+115 -19

cli/src/main.rs

···

       15
       15
        
       use jacquard::oauth::client::OAuthClient;

     

       16
       16
        
       use jacquard::oauth::loopback::LoopbackConfig;

     

       17
       17
        
       use jacquard::prelude::IdentityResolver;

     

       18
       18
       -
       use jacquard_common::types::string::{Datetime, Rkey, RecordKey};

     

       18
       18
       +
       use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri};

     

       19
       19
        
       use jacquard_common::types::blob::MimeType;

     

       20
       20
        
       use miette::IntoDiagnostic;

     

       21
       21
        
       use std::path::{Path, PathBuf};

     
···

       356
       356
        
                       println!("  Split #{}: {} ({} files, {:.1}KB)",

     

       357
       357
        
                           attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0);

     

       358
       358
        
       

     

       359
       359
       -
                       // Create a subfs record for this directory

     

       360
       360
       -
                       use jacquard_common::types::string::Tid;

     

       361
       361
       -
                       let subfs_tid = Tid::now_0();

     

       362
       362
       -
                       let subfs_rkey = subfs_tid.to_string();

     

       359
       359
       +
                       // Check if this directory is itself too large for a single subfs record

     

       360
       360
       +
                       const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety

     

       361
       361
       +
                       let mut subfs_uri = String::new();

     

       362
       362
       +
       

     

       363
       363
       +
                       if largest_dir.size > MAX_SUBFS_SIZE {

     

       364
       364
       +
                           // Need to split this directory into multiple chunks

     

       365
       365
       +
                           println!("  → Directory too large, splitting into chunks...");

     

       366
       366
       +
                           let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE);

     

       367
       367
       +
                           println!("  → Created {} chunks", chunks.len());

     

       368
       368
       +
       

     

       369
       369
       +
                           // Upload each chunk as a subfs record

     

       370
       370
       +
                           let mut chunk_uris = Vec::new();

     

       371
       371
       +
                           for (i, chunk) in chunks.iter().enumerate() {

     

       372
       372
       +
                               use jacquard_common::types::string::Tid;

     

       373
       373
       +
                               let chunk_tid = Tid::now_0();

     

       374
       374
       +
                               let chunk_rkey = chunk_tid.to_string();

     

       375
       375
       +
       

     

       376
       376
       +
                               let chunk_file_count = subfs_utils::count_files_in_directory(chunk);

     

       377
       377
       +
                               let chunk_size = subfs_utils::estimate_directory_size(chunk);

     

       363
       378
        
       

     

       364
       364
       -
                       let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new()

     

       365
       365
       -
                           .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone()))

     

       366
       366
       -
                           .file_count(Some(largest_dir.file_count as i64))

     

       367
       367
       -
                           .created_at(Datetime::now())

     

       368
       368
       -
                           .build();

     

       379
       379
       +
                               let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new()

     

       380
       380
       +
                                   .root(convert_fs_dir_to_subfs_dir(chunk.clone()))

     

       381
       381
       +
                                   .file_count(Some(chunk_file_count as i64))

     

       382
       382
       +
                                   .created_at(Datetime::now())

     

       383
       383
       +
                                   .build();

     

       384
       384
       +
       

     

       385
       385
       +
                               println!("  → Uploading chunk {}/{} ({} files, {:.1}KB)...",

     

       386
       386
       +
                                   i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0);

     

       387
       387
       +
       

     

       388
       388
       +
                               let chunk_output = agent.put_record(

     

       389
       389
       +
                                   RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?),

     

       390
       390
       +
                                   chunk_manifest

     

       391
       391
       +
                               ).await.into_diagnostic()?;

     

       369
       392
        
       

     

       370
       370
       -
                       // Upload subfs record

     

       371
       371
       -
                       let subfs_output = agent.put_record(

     

       372
       372
       -
                           RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?),

     

       373
       373
       -
                           subfs_manifest

     

       374
       374
       -
                       ).await.into_diagnostic()?;

     

       393
       393
       +
                               let chunk_uri = chunk_output.uri.to_string();

     

       394
       394
       +
                               chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));

     

       395
       395
       +
                               new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));

     

       396
       396
       +
                           }

     

       375
       397
        
       

     

       376
       376
       -
                       let subfs_uri = subfs_output.uri.to_string();

     

       377
       377
       -
                       println!("  ✅ Created subfs: {}", subfs_uri);

     

       398
       398
       +
                           // Create a parent subfs record that references all chunks

     

       399
       399
       +
                           // Each chunk reference MUST have flat: true to merge chunk contents

     

       400
       400
       +
                           println!("  → Creating parent subfs with {} chunk references...", chunk_uris.len());

     

       401
       401
       +
                           use jacquard_common::CowStr;

     

       402
       402
       +
                           use crate::place_wisp::fs::{Subfs};

     

       378
       403
        
       

     

       379
       379
       -
                       // Replace directory with subfs node (flat: false to preserve structure)

     

       404
       404
       +
                           // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs

     

       405
       405
       +
                           let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| {

     

       406
       406
       +
                               let uri_string = uri.clone();

     

       407
       407
       +
                               let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI");

     

       408
       408
       +
                               Entry::new()

     

       409
       409
       +
                                   .name(CowStr::from(format!("chunk{}", i)))

     

       410
       410
       +
                                   .node(EntryNode::Subfs(Box::new(

     

       411
       411
       +
                                       Subfs::new()

     

       412
       412
       +
                                           .r#type(CowStr::from("subfs"))

     

       413
       413
       +
                                           .subject(at_uri)

     

       414
       414
       +
                                           .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents

     

       415
       415
       +
                                           .build()

     

       416
       416
       +
                                   )))

     

       417
       417
       +
                                   .build()

     

       418
       418
       +
                           }).collect();

     

       419
       419
       +
       

     

       420
       420
       +
                           let parent_root_fs = Directory::new()

     

       421
       421
       +
                               .r#type(CowStr::from("directory"))

     

       422
       422
       +
                               .entries(parent_entries_fs)

     

       423
       423
       +
                               .build();

     

       424
       424
       +
       

     

       425
       425
       +
                           // Convert to subfs::Directory for the parent subfs record

     

       426
       426
       +
                           let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs);

     

       427
       427
       +
       

     

       428
       428
       +
                           use jacquard_common::types::string::Tid;

     

       429
       429
       +
                           let parent_tid = Tid::now_0();

     

       430
       430
       +
                           let parent_rkey = parent_tid.to_string();

     

       431
       431
       +
       

     

       432
       432
       +
                           let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new()

     

       433
       433
       +
                               .root(parent_root_subfs)

     

       434
       434
       +
                               .file_count(Some(largest_dir.file_count as i64))

     

       435
       435
       +
                               .created_at(Datetime::now())

     

       436
       436
       +
                               .build();

     

       437
       437
       +
       

     

       438
       438
       +
                           let parent_output = agent.put_record(

     

       439
       439
       +
                               RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?),

     

       440
       440
       +
                               parent_manifest

     

       441
       441
       +
                           ).await.into_diagnostic()?;

     

       442
       442
       +
       

     

       443
       443
       +
                           subfs_uri = parent_output.uri.to_string();

     

       444
       444
       +
                           println!("  ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri);

     

       445
       445
       +
                       } else {

     

       446
       446
       +
                           // Directory fits in a single subfs record

     

       447
       447
       +
                           use jacquard_common::types::string::Tid;

     

       448
       448
       +
                           let subfs_tid = Tid::now_0();

     

       449
       449
       +
                           let subfs_rkey = subfs_tid.to_string();

     

       450
       450
       +
       

     

       451
       451
       +
                           let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new()

     

       452
       452
       +
                               .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone()))

     

       453
       453
       +
                               .file_count(Some(largest_dir.file_count as i64))

     

       454
       454
       +
                               .created_at(Datetime::now())

     

       455
       455
       +
                               .build();

     

       456
       456
       +
       

     

       457
       457
       +
                           // Upload subfs record

     

       458
       458
       +
                           let subfs_output = agent.put_record(

     

       459
       459
       +
                               RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?),

     

       460
       460
       +
                               subfs_manifest

     

       461
       461
       +
                           ).await.into_diagnostic()?;

     

       462
       462
       +
       

     

       463
       463
       +
                           subfs_uri = subfs_output.uri.to_string();

     

       464
       464
       +
                           println!("  ✅ Created subfs: {}", subfs_uri);

     

       465
       465
       +
                       }

     

       466
       466
       +
       

     

       467
       467
       +
                       // Replace directory with subfs node (flat: false to preserve directory structure)

     

       380
       468
        
                       working_directory = subfs_utils::replace_directory_with_subfs(

     

       381
       469
        
                           working_directory,

     

       382
       470
        
                           &largest_dir.path,

     

       383
       471
        
                           &subfs_uri,

     

       384
       384
       -
                           false // Preserve directory structure

     

       472
       472
       +
                           false // Preserve directory - the chunks inside have flat=true

     

       385
       473
        
                       )?;

     

       386
       474
        
       

     

       387
       475
        
                       new_subfs_uris.push((subfs_uri, largest_dir.path.clone()));

     
···

       729
       817
        
                   }

     

       730
       818
        
       

     

       731
       819
        
                   return Ok((file_builder.build(), true));

     

       820
       820
       +
               } else {

     

       821
       821
       +
                   // CID mismatch - file changed

     

       822
       822
       +
                   println!("  → File changed: {} (old CID: {}, new CID: {})", file_path_key, existing_cid, file_cid);

     

       823
       823
       +
               }

     

       824
       824
       +
           } else {

     

       825
       825
       +
               // File not in existing blob map

     

       826
       826
       +
               if file_path_key.starts_with("imgs/") {

     

       827
       827
       +
                   println!("  → New file (not in blob map): {}", file_path_key);

     

       732
       828
        
               }

     

       733
       829
        
           }

     

       734
       830

+30 -32

cli/src/pull.rs

···

       35
       35
        
           let pds_url = resolver.pds_for_did(&did).await.into_diagnostic()?;

     

       36
       36
        
           println!("Resolved PDS: {}", pds_url);

     

       37
       37
        
       

     

       38
       38
       -
           // Fetch the place.wisp.fs record

     

       39
       39
       -
       

     

       38
       38
       +
           // Create a temporary agent for fetching records (no auth needed for public reads)

     

       40
       39
        
           println!("Fetching record from PDS...");

     

       41
       40
        
           let client = reqwest::Client::new();

     

       42
       42
       -
           

     

       41
       41
       +
       

     

       43
       42
        
           // Use com.atproto.repo.getRecord

     

       44
       43
        
           use jacquard::api::com_atproto::repo::get_record::GetRecord;

     

       45
       44
        
           use jacquard_common::types::string::Rkey as RkeyType;

     

       46
       45
        
           let rkey_parsed = RkeyType::new(&rkey).into_diagnostic()?;

     

       47
       47
       -
           

     

       46
       46
       +
       

     

       48
       47
        
           use jacquard_common::types::ident::AtIdentifier;

     

       49
       48
        
           use jacquard_common::types::string::RecordKey;

     

       50
       49
        
           let request = GetRecord::new()

     
···

       70
       69
        
           println!("Found site '{}' with {} files (in main record)", fs_record.site, file_count);

     

       71
       70
        
       

     

       72
       71
        
           // Check for and expand subfs nodes

     

       73
       73
       -
           let expanded_root = expand_subfs_in_pull(&fs_record.root, &pds_url, did.as_str()).await?;

     

       72
       72
       +
           // Note: We use a custom expand function for pull since we don't have an Agent

     

       73
       73
       +
           let expanded_root = expand_subfs_in_pull_with_client(&fs_record.root, &client, &pds_url).await?;

     

       74
       74
        
           let total_file_count = subfs_utils::count_files_in_directory(&expanded_root);

     

       75
       75
        
       

     

       76
       76
        
           if total_file_count as i64 != fs_record.file_count.unwrap_or(0) {

     
···

       402
       402
        
       }

     

       403
       403
        
       

     

       404
       404
        
       /// Expand subfs nodes in a directory tree by fetching and merging subfs records (RECURSIVELY)

     

       405
       405
       -
       async fn expand_subfs_in_pull<'a>(

     

       405
       405
       +
       /// Uses reqwest client directly for pull command (no agent needed)

     

       406
       406
       +
       async fn expand_subfs_in_pull_with_client<'a>(

     

       406
       407
        
           directory: &Directory<'a>,

     

       408
       408
       +
           client: &reqwest::Client,

     

       407
       409
        
           pds_url: &Url,

     

       408
       408
       -
           _did: &str,

     

       409
       410
        
       ) -> miette::Result<Directory<'static>> {

     

       411
       411
       +
           use jacquard_common::IntoStatic;

     

       412
       412
       +
           use jacquard_common::types::value::from_data;

     

       410
       413
        
           use crate::place_wisp::subfs::SubfsRecord;

     

       411
       411
       -
           use jacquard_common::types::value::from_data;

     

       412
       412
       -
           use jacquard_common::IntoStatic;

     

       413
       414
        
       

     

       414
       414
       -
           // Recursively fetch ALL subfs records (including nested ones)

     

       415
       415
        
           let mut all_subfs_map: HashMap<String, crate::place_wisp::subfs::Directory> = HashMap::new();

     

       416
       416
        
           let mut to_fetch = subfs_utils::extract_subfs_uris(directory, String::new());

     

       417
       417
        
       

     
···

       420
       420
        
           }

     

       421
       421
        
       

     

       422
       422
        
           println!("Found {} subfs records, fetching recursively...", to_fetch.len());

     

       423
       423
       -
           let client = reqwest::Client::new();

     

       424
       423
        
       

     

       425
       425
       -
           // Keep fetching until we've resolved all subfs (including nested ones)

     

       426
       424
        
           let mut iteration = 0;

     

       427
       427
       -
           const MAX_ITERATIONS: usize = 10; // Prevent infinite loops

     

       425
       425
       +
           const MAX_ITERATIONS: usize = 10;

     

       428
       426
        
       

     

       429
       427
        
           while !to_fetch.is_empty() && iteration < MAX_ITERATIONS {

     

       430
       428
        
               iteration += 1;

     
···

       437
       435
        
                   let pds_url = pds_url.clone();

     

       438
       436
        
       

     

       439
       437
        
                   fetch_tasks.push(async move {

     

       438
       438
       +
                       // Parse URI

     

       440
       439
        
                       let parts: Vec<&str> = uri.trim_start_matches("at://").split('/').collect();

     

       441
       440
        
                       if parts.len() < 3 {

     

       442
       441
        
                           return Err(miette::miette!("Invalid subfs URI: {}", uri));

     

       443
       442
        
                       }

     

       444
       443
        
       

     

       445
       445
       -
                       let _did = parts[0];

     

       444
       444
       +
                       let did_str = parts[0];

     

       446
       445
        
                       let collection = parts[1];

     

       447
       447
       -
                       let rkey = parts[2];

     

       446
       446
       +
                       let rkey_str = parts[2];

     

       448
       447
        
       

     

       449
       448
        
                       if collection != "place.wisp.subfs" {

     

       450
       449
        
                           return Err(miette::miette!("Expected place.wisp.subfs collection, got: {}", collection));

     

       451
       450
        
                       }

     

       452
       451
        
       

     

       452
       452
       +
                       // Fetch using GetRecord

     

       453
       453
        
                       use jacquard::api::com_atproto::repo::get_record::GetRecord;

     

       454
       454
       -
                       use jacquard_common::types::string::Rkey as RkeyType;

     

       454
       454
       +
                       use jacquard_common::types::string::{Rkey as RkeyType, Did as DidType, RecordKey};

     

       455
       455
        
                       use jacquard_common::types::ident::AtIdentifier;

     

       456
       456
       -
                       use jacquard_common::types::string::{RecordKey, Did as DidType};

     

       457
       456
        
       

     

       458
       458
       -
                       let rkey_parsed = RkeyType::new(rkey).into_diagnostic()?;

     

       459
       459
       -
                       let did_parsed = DidType::new(_did).into_diagnostic()?;

     

       457
       457
       +
                       let rkey_parsed = RkeyType::new(rkey_str).into_diagnostic()?;

     

       458
       458
       +
                       let did_parsed = DidType::new(did_str).into_diagnostic()?;

     

       460
       459
        
       

     

       461
       460
        
                       let request = GetRecord::new()

     

       462
       461
        
                           .repo(AtIdentifier::Did(did_parsed))

     
···

       472
       471
        
       

     

       473
       472
        
                       let record_output = response.into_output().into_diagnostic()?;

     

       474
       473
        
                       let subfs_record: SubfsRecord = from_data(&record_output.value).into_diagnostic()?;

     

       475
       475
       -
                       let subfs_record_static = subfs_record.into_static();

     

       476
       474
        
       

     

       477
       477
       -
                       Ok::<_, miette::Report>((path, subfs_record_static))

     

       475
       475
       +
                       Ok::<_, miette::Report>((path, subfs_record.into_static()))

     

       478
       476
        
                   });

     

       479
       477
        
               }

     

       480
       478
        
       

     

       481
       479
        
               let results: Vec<_> = futures::future::join_all(fetch_tasks).await;

     

       482
       480
        
       

     

       483
       481
        
               // Process results and find nested subfs

     

       484
       484
       -
               let mut newly_fetched = Vec::new();

     

       482
       482
       +
               let mut newly_found_uris = Vec::new();

     

       485
       483
        
               for result in results {

     

       486
       484
        
                   match result {

     

       487
       485
        
                       Ok((path, record)) => {

     

       488
       486
        
                           println!("    ✓ Fetched subfs at {}", path);

     

       489
       487
        
       

     

       490
       490
       -
                           // Check for nested subfs in this record

     

       491
       491
       -
                           let nested_subfs = extract_subfs_from_subfs_dir(&record.root, path.clone());

     

       492
       492
       -
                           newly_fetched.extend(nested_subfs);

     

       488
       488
       +
                           // Extract nested subfs URIs

     

       489
       489
       +
                           let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, path.clone());

     

       490
       490
       +
                           newly_found_uris.extend(nested_uris);

     

       493
       491
        
       

     

       494
       492
        
                           all_subfs_map.insert(path, record.root);

     

       495
       493
        
                       }

     
···

       499
       497
        
                   }

     

       500
       498
        
               }

     

       501
       499
        
       

     

       502
       502
       -
               // Update to_fetch with only the NEW subfs we haven't fetched yet

     

       503
       503
       -
               to_fetch = newly_fetched

     

       500
       500
       +
               // Filter out already-fetched paths

     

       501
       501
       +
               to_fetch = newly_found_uris

     

       504
       502
        
                   .into_iter()

     

       505
       505
       -
                   .filter(|(uri, _)| !all_subfs_map.iter().any(|(k, _)| k == uri))

     

       503
       503
       +
                   .filter(|(_, path)| !all_subfs_map.contains_key(path))

     

       506
       504
        
                   .collect();

     

       507
       505
        
           }

     

       508
       506
        
       

     

       509
       507
        
           if iteration >= MAX_ITERATIONS {

     

       510
       510
       -
               return Err(miette::miette!("Max iterations reached while fetching nested subfs"));

     

       508
       508
       +
               eprintln!("⚠️  Max iterations reached while fetching nested subfs");

     

       511
       509
        
           }

     

       512
       510
        
       

     

       513
       511
        
           println!("  Total subfs records fetched: {}", all_subfs_map.len());

     
···

       516
       514
        
           Ok(replace_subfs_with_content(directory.clone(), &all_subfs_map, String::new()))

     

       517
       515
        
       }

     

       518
       516
        
       

     

       519
       519
       -
       /// Extract subfs URIs from a subfs::Directory

     

       520
       520
       -
       fn extract_subfs_from_subfs_dir(

     

       517
       517
       +
       /// Extract subfs URIs from a subfs::Directory (helper for pull)

     

       518
       518
       +
       fn extract_subfs_uris_from_subfs_dir(

     

       521
       519
        
           directory: &crate::place_wisp::subfs::Directory,

     

       522
       520
        
           current_path: String,

     

       523
       521
        
       ) -> Vec<(String, String)> {

     
···

       535
       533
        
                       uris.push((subfs_node.subject.to_string(), full_path.clone()));

     

       536
       534
        
                   }

     

       537
       535
        
                   crate::place_wisp::subfs::EntryNode::Directory(subdir) => {

     

       538
       538
       -
                       let nested = extract_subfs_from_subfs_dir(subdir, full_path);

     

       536
       536
       +
                       let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path);

     

       539
       537
        
                       uris.extend(nested);

     

       540
       538
        
                   }

     

       541
       539
        
                   _ => {}

+195 -34

cli/src/subfs_utils.rs

···

       72
       72
        
           Ok(record_output.value.into_static())

     

       73
       73
        
       }

     

       74
       74
        
       

     

       75
       75
       -
       /// Merge blob maps from subfs records into the main blob map

     

       76
       76
       -
       /// Returns the total number of blobs merged from all subfs records

     

       77
       77
       -
       pub async fn merge_subfs_blob_maps(

     

       75
       75
       +
       /// Recursively fetch all subfs records (including nested ones)

     

       76
       76
       +
       /// Returns a list of (mount_path, SubfsRecord) tuples

     

       77
       77
       +
       /// Note: Multiple records can have the same mount_path (for flat-merged chunks)

     

       78
       78
       +
       pub async fn fetch_all_subfs_records_recursive(

     

       78
       79
        
           agent: &Agent<impl AgentSession + IdentityResolver>,

     

       79
       79
       -
           subfs_uris: Vec<(String, String)>,

     

       80
       80
       -
           main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>,

     

       81
       81
       -
       ) -> miette::Result<usize> {

     

       82
       82
       -
           let mut total_merged = 0;

     

       80
       80
       +
           initial_uris: Vec<(String, String)>,

     

       81
       81
       +
       ) -> miette::Result<Vec<(String, SubfsRecord<'static>)>> {

     

       82
       82
       +
           use futures::stream::{self, StreamExt};

     

       83
       83
        
       

     

       84
       84
       -
           println!("Fetching {} subfs records for blob reuse...", subfs_uris.len());

     

       84
       84
       +
           let mut all_subfs: Vec<(String, SubfsRecord<'static>)> = Vec::new();

     

       85
       85
       +
           let mut fetched_uris: std::collections::HashSet<String> = std::collections::HashSet::new();

     

       86
       86
       +
           let mut to_fetch = initial_uris;

     

       85
       87
        
       

     

       86
       86
       -
           // Fetch all subfs records in parallel (but with some concurrency limit)

     

       87
       87
       -
           use futures::stream::{self, StreamExt};

     

       88
       88
       +
           if to_fetch.is_empty() {

     

       89
       89
       +
               return Ok(all_subfs);

     

       90
       90
       +
           }

     

       91
       91
       +
       

     

       92
       92
       +
           println!("Found {} subfs records, fetching recursively...", to_fetch.len());

     

       93
       93
       +
       

     

       94
       94
       +
           let mut iteration = 0;

     

       95
       95
       +
           const MAX_ITERATIONS: usize = 10;

     

       88
       96
        
       

     

       89
       89
       -
           let subfs_results: Vec<_> = stream::iter(subfs_uris)

     

       90
       90
       -
               .map(|(uri, mount_path)| async move {

     

       91
       91
       -
                   match fetch_subfs_record(agent, &uri).await {

     

       92
       92
       -
                       Ok(record) => Some((record, mount_path)),

     

       93
       93
       -
                       Err(e) => {

     

       94
       94
       -
                           eprintln!("  ⚠️  Failed to fetch subfs {}: {}", uri, e);

     

       95
       95
       -
                           None

     

       97
       97
       +
           while !to_fetch.is_empty() && iteration < MAX_ITERATIONS {

     

       98
       98
       +
               iteration += 1;

     

       99
       99
       +
               println!("  Iteration {}: fetching {} subfs records...", iteration, to_fetch.len());

     

       100
       100
       +
       

     

       101
       101
       +
               let subfs_results: Vec<_> = stream::iter(to_fetch.clone())

     

       102
       102
       +
                   .map(|(uri, mount_path)| async move {

     

       103
       103
       +
                       match fetch_subfs_record(agent, &uri).await {

     

       104
       104
       +
                           Ok(record) => Some((mount_path, record, uri)),

     

       105
       105
       +
                           Err(e) => {

     

       106
       106
       +
                               eprintln!("  ⚠️  Failed to fetch subfs {}: {}", uri, e);

     

       107
       107
       +
                               None

     

       108
       108
       +
                           }

     

       96
       109
        
                       }

     

       110
       110
       +
                   })

     

       111
       111
       +
                   .buffer_unordered(5)

     

       112
       112
       +
                   .collect()

     

       113
       113
       +
                   .await;

     

       114
       114
       +
       

     

       115
       115
       +
               // Process results and find nested subfs

     

       116
       116
       +
               let mut newly_found_uris = Vec::new();

     

       117
       117
       +
               for result in subfs_results {

     

       118
       118
       +
                   if let Some((mount_path, record, uri)) = result {

     

       119
       119
       +
                       println!("    ✓ Fetched subfs at {}", mount_path);

     

       120
       120
       +
       

     

       121
       121
       +
                       // Extract nested subfs URIs from this record

     

       122
       122
       +
                       let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, mount_path.clone());

     

       123
       123
       +
                       newly_found_uris.extend(nested_uris);

     

       124
       124
       +
       

     

       125
       125
       +
                       all_subfs.push((mount_path, record));

     

       126
       126
       +
                       fetched_uris.insert(uri);

     

       97
       127
        
                   }

     

       98
       98
       -
               })

     

       99
       99
       -
               .buffer_unordered(5)

     

       100
       100
       -
               .collect()

     

       101
       101
       -
               .await;

     

       128
       128
       +
               }

     

       102
       129
        
       

     

       103
       103
       -
           // Convert subfs Directory to fs Directory for blob extraction

     

       104
       104
       -
           // Note: We need to extract blobs from the subfs record's root

     

       105
       105
       -
           for result in subfs_results {

     

       106
       106
       -
               if let Some((subfs_record, mount_path)) = result {

     

       107
       107
       -
                   // Extract blobs from this subfs record's root

     

       108
       108
       -
                   // The blob_map module works with fs::Directory, but subfs::Directory has the same structure

     

       109
       109
       -
                   // We need to convert or work directly with the entries

     

       130
       130
       +
               // Filter out already-fetched URIs (based on URI, not path)

     

       131
       131
       +
               to_fetch = newly_found_uris

     

       132
       132
       +
                   .into_iter()

     

       133
       133
       +
                   .filter(|(uri, _)| !fetched_uris.contains(uri))

     

       134
       134
       +
                   .collect();

     

       135
       135
       +
           }

     

       110
       136
        
       

     

       111
       111
       -
                   let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone());

     

       112
       112
       -
                   let count = subfs_blob_map.len();

     

       137
       137
       +
           if iteration >= MAX_ITERATIONS {

     

       138
       138
       +
               eprintln!("⚠️  Max iterations reached while fetching nested subfs");

     

       139
       139
       +
           }

     

       113
       140
        
       

     

       114
       114
       -
                   for (path, blob_info) in subfs_blob_map {

     

       115
       115
       -
                       main_blob_map.insert(path, blob_info);

     

       141
       141
       +
           println!("  Total subfs records fetched: {}", all_subfs.len());

     

       142
       142
       +
       

     

       143
       143
       +
           Ok(all_subfs)

     

       144
       144
       +
       }

     

       145
       145
       +
       

     

       146
       146
       +
       /// Extract subfs URIs from a subfs::Directory

     

       147
       147
       +
       fn extract_subfs_uris_from_subfs_dir(

     

       148
       148
       +
           directory: &crate::place_wisp::subfs::Directory,

     

       149
       149
       +
           current_path: String,

     

       150
       150
       +
       ) -> Vec<(String, String)> {

     

       151
       151
       +
           let mut uris = Vec::new();

     

       152
       152
       +
       

     

       153
       153
       +
           for entry in &directory.entries {

     

       154
       154
       +
               match &entry.node {

     

       155
       155
       +
                   crate::place_wisp::subfs::EntryNode::Subfs(subfs_node) => {

     

       156
       156
       +
                       // Check if this is a chunk entry (chunk0, chunk1, etc.)

     

       157
       157
       +
                       // Chunks should be flat-merged, so use the parent's path

     

       158
       158
       +
                       let mount_path = if entry.name.starts_with("chunk") &&

     

       159
       159
       +
                                          entry.name.chars().skip(5).all(|c| c.is_ascii_digit()) {

     

       160
       160
       +
                           // This is a chunk - use parent's path for flat merge

     

       161
       161
       +
                           println!("  → Found chunk {} at {}, will flat-merge to {}", entry.name, current_path, current_path);

     

       162
       162
       +
                           current_path.clone()

     

       163
       163
       +
                       } else {

     

       164
       164
       +
                           // Normal subfs - append name to path

     

       165
       165
       +
                           if current_path.is_empty() {

     

       166
       166
       +
                               entry.name.to_string()

     

       167
       167
       +
                           } else {

     

       168
       168
       +
                               format!("{}/{}", current_path, entry.name)

     

       169
       169
       +
                           }

     

       170
       170
       +
                       };

     

       171
       171
       +
       

     

       172
       172
       +
                       uris.push((subfs_node.subject.to_string(), mount_path));

     

       116
       173
        
                   }

     

       174
       174
       +
                   crate::place_wisp::subfs::EntryNode::Directory(subdir) => {

     

       175
       175
       +
                       let full_path = if current_path.is_empty() {

     

       176
       176
       +
                           entry.name.to_string()

     

       177
       177
       +
                       } else {

     

       178
       178
       +
                           format!("{}/{}", current_path, entry.name)

     

       179
       179
       +
                       };

     

       180
       180
       +
                       let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path);

     

       181
       181
       +
                       uris.extend(nested);

     

       182
       182
       +
                   }

     

       183
       183
       +
                   _ => {}

     

       184
       184
       +
               }

     

       185
       185
       +
           }

     

       117
       186
        
       

     

       118
       118
       -
                   total_merged += count;

     

       119
       119
       -
                   println!("  ✓ Merged {} blobs from subfs at {}", count, mount_path);

     

       187
       187
       +
           uris

     

       188
       188
       +
       }

     

       189
       189
       +
       

     

       190
       190
       +
       /// Merge blob maps from subfs records into the main blob map (RECURSIVE)

     

       191
       191
       +
       /// Returns the total number of blobs merged from all subfs records

     

       192
       192
       +
       pub async fn merge_subfs_blob_maps(

     

       193
       193
       +
           agent: &Agent<impl AgentSession + IdentityResolver>,

     

       194
       194
       +
           subfs_uris: Vec<(String, String)>,

     

       195
       195
       +
           main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>,

     

       196
       196
       +
       ) -> miette::Result<usize> {

     

       197
       197
       +
           // Fetch all subfs records recursively

     

       198
       198
       +
           let all_subfs = fetch_all_subfs_records_recursive(agent, subfs_uris).await?;

     

       199
       199
       +
       

     

       200
       200
       +
           let mut total_merged = 0;

     

       201
       201
       +
       

     

       202
       202
       +
           // Extract blobs from all fetched subfs records

     

       203
       203
       +
           // Skip parent records that only contain chunk references (no actual files)

     

       204
       204
       +
           for (mount_path, subfs_record) in all_subfs {

     

       205
       205
       +
               // Check if this record only contains chunk subfs references (no files)

     

       206
       206
       +
               let only_has_chunks = subfs_record.root.entries.iter().all(|e| {

     

       207
       207
       +
                   matches!(&e.node, crate::place_wisp::subfs::EntryNode::Subfs(_)) &&

     

       208
       208
       +
                   e.name.starts_with("chunk") &&

     

       209
       209
       +
                   e.name.chars().skip(5).all(|c| c.is_ascii_digit())

     

       210
       210
       +
               });

     

       211
       211
       +
       

     

       212
       212
       +
               if only_has_chunks && !subfs_record.root.entries.is_empty() {

     

       213
       213
       +
                   // This is a parent containing only chunks - skip it, blobs are in the chunks

     

       214
       214
       +
                   println!("  → Skipping parent subfs at {} ({} chunks, no files)", mount_path, subfs_record.root.entries.len());

     

       215
       215
       +
                   continue;

     

       216
       216
       +
               }

     

       217
       217
       +
       

     

       218
       218
       +
               let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone());

     

       219
       219
       +
               let count = subfs_blob_map.len();

     

       220
       220
       +
       

     

       221
       221
       +
               for (path, blob_info) in subfs_blob_map {

     

       222
       222
       +
                   main_blob_map.insert(path, blob_info);

     

       120
       223
        
               }

     

       224
       224
       +
       

     

       225
       225
       +
               total_merged += count;

     

       226
       226
       +
               println!("  ✓ Merged {} blobs from subfs at {}", count, mount_path);

     

       121
       227
        
           }

     

       122
       228
        
       

     

       123
       229
        
           Ok(total_merged)

     
···

       334
       440
        
       

     

       335
       441
        
           Ok(())

     

       336
       442
        
       }

     

       443
       443
       +
       

     

       444
       444
       +
       /// Split a large directory into multiple smaller chunks

     

       445
       445
       +
       /// Returns a list of chunk directories, each small enough to fit in a subfs record

     

       446
       446
       +
       pub fn split_directory_into_chunks(

     

       447
       447
       +
           directory: &FsDirectory,

     

       448
       448
       +
           max_size: usize,

     

       449
       449
       +
       ) -> Vec<FsDirectory<'static>> {

     

       450
       450
       +
           use jacquard_common::CowStr;

     

       451
       451
       +
       

     

       452
       452
       +
           let mut chunks = Vec::new();

     

       453
       453
       +
           let mut current_chunk_entries = Vec::new();

     

       454
       454
       +
           let mut current_chunk_size = 100; // Base size for directory structure

     

       455
       455
       +
       

     

       456
       456
       +
           for entry in &directory.entries {

     

       457
       457
       +
               // Estimate the size of this entry

     

       458
       458
       +
               let entry_size = estimate_entry_size(entry);

     

       459
       459
       +
       

     

       460
       460
       +
               // If adding this entry would exceed the max size, start a new chunk

     

       461
       461
       +
               if !current_chunk_entries.is_empty() && (current_chunk_size + entry_size > max_size) {

     

       462
       462
       +
                   // Create a chunk from current entries

     

       463
       463
       +
                   let chunk = FsDirectory::new()

     

       464
       464
       +
                       .r#type(CowStr::from("directory"))

     

       465
       465
       +
                       .entries(current_chunk_entries.clone())

     

       466
       466
       +
                       .build();

     

       467
       467
       +
       

     

       468
       468
       +
                   chunks.push(chunk);

     

       469
       469
       +
       

     

       470
       470
       +
                   // Start new chunk

     

       471
       471
       +
                   current_chunk_entries.clear();

     

       472
       472
       +
                   current_chunk_size = 100;

     

       473
       473
       +
               }

     

       474
       474
       +
       

     

       475
       475
       +
               current_chunk_entries.push(entry.clone().into_static());

     

       476
       476
       +
               current_chunk_size += entry_size;

     

       477
       477
       +
           }

     

       478
       478
       +
       

     

       479
       479
       +
           // Add the last chunk if it has any entries

     

       480
       480
       +
           if !current_chunk_entries.is_empty() {

     

       481
       481
       +
               let chunk = FsDirectory::new()

     

       482
       482
       +
                   .r#type(CowStr::from("directory"))

     

       483
       483
       +
                   .entries(current_chunk_entries)

     

       484
       484
       +
                   .build();

     

       485
       485
       +
               chunks.push(chunk);

     

       486
       486
       +
           }

     

       487
       487
       +
       

     

       488
       488
       +
           chunks

     

       489
       489
       +
       }

     

       490
       490
       +
       

     

       491
       491
       +
       /// Estimate the JSON size of a single entry

     

       492
       492
       +
       fn estimate_entry_size(entry: &crate::place_wisp::fs::Entry) -> usize {

     

       493
       493
       +
           match serde_json::to_string(entry) {

     

       494
       494
       +
               Ok(json) => json.len(),

     

       495
       495
       +
               Err(_) => 500, // Conservative estimate if serialization fails

     

       496
       496
       +
           }

     

       497
       497
       +
       }