···
Ok(record_output.value.into_static())
75
-
/// Merge blob maps from subfs records into the main blob map
76
-
/// Returns the total number of blobs merged from all subfs records
77
-
pub async fn merge_subfs_blob_maps(
75
+
/// Recursively fetch all subfs records (including nested ones)
76
+
/// Returns a list of (mount_path, SubfsRecord) tuples
77
+
/// Note: Multiple records can have the same mount_path (for flat-merged chunks)
78
+
pub async fn fetch_all_subfs_records_recursive(
agent: &Agent<impl AgentSession + IdentityResolver>,
79
-
subfs_uris: Vec<(String, String)>,
80
-
main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>,
81
-
) -> miette::Result<usize> {
82
-
let mut total_merged = 0;
80
+
initial_uris: Vec<(String, String)>,
81
+
) -> miette::Result<Vec<(String, SubfsRecord<'static>)>> {
82
+
use futures::stream::{self, StreamExt};
84
-
println!("Fetching {} subfs records for blob reuse...", subfs_uris.len());
84
+
let mut all_subfs: Vec<(String, SubfsRecord<'static>)> = Vec::new();
85
+
let mut fetched_uris: std::collections::HashSet<String> = std::collections::HashSet::new();
86
+
let mut to_fetch = initial_uris;
86
-
// Fetch all subfs records in parallel (but with some concurrency limit)
87
-
use futures::stream::{self, StreamExt};
88
+
if to_fetch.is_empty() {
89
+
return Ok(all_subfs);
92
+
println!("Found {} subfs records, fetching recursively...", to_fetch.len());
94
+
let mut iteration = 0;
95
+
const MAX_ITERATIONS: usize = 10;
89
-
let subfs_results: Vec<_> = stream::iter(subfs_uris)
90
-
.map(|(uri, mount_path)| async move {
91
-
match fetch_subfs_record(agent, &uri).await {
92
-
Ok(record) => Some((record, mount_path)),
94
-
eprintln!(" ⚠️ Failed to fetch subfs {}: {}", uri, e);
97
+
while !to_fetch.is_empty() && iteration < MAX_ITERATIONS {
99
+
println!(" Iteration {}: fetching {} subfs records...", iteration, to_fetch.len());
101
+
let subfs_results: Vec<_> = stream::iter(to_fetch.clone())
102
+
.map(|(uri, mount_path)| async move {
103
+
match fetch_subfs_record(agent, &uri).await {
104
+
Ok(record) => Some((mount_path, record, uri)),
106
+
eprintln!(" ⚠️ Failed to fetch subfs {}: {}", uri, e);
111
+
.buffer_unordered(5)
115
+
// Process results and find nested subfs
116
+
let mut newly_found_uris = Vec::new();
117
+
for result in subfs_results {
118
+
if let Some((mount_path, record, uri)) = result {
119
+
println!(" ✓ Fetched subfs at {}", mount_path);
121
+
// Extract nested subfs URIs from this record
122
+
let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, mount_path.clone());
123
+
newly_found_uris.extend(nested_uris);
125
+
all_subfs.push((mount_path, record));
126
+
fetched_uris.insert(uri);
99
-
.buffer_unordered(5)
103
-
// Convert subfs Directory to fs Directory for blob extraction
104
-
// Note: We need to extract blobs from the subfs record's root
105
-
for result in subfs_results {
106
-
if let Some((subfs_record, mount_path)) = result {
107
-
// Extract blobs from this subfs record's root
108
-
// The blob_map module works with fs::Directory, but subfs::Directory has the same structure
109
-
// We need to convert or work directly with the entries
130
+
// Filter out already-fetched URIs (based on URI, not path)
131
+
to_fetch = newly_found_uris
133
+
.filter(|(uri, _)| !fetched_uris.contains(uri))
111
-
let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone());
112
-
let count = subfs_blob_map.len();
137
+
if iteration >= MAX_ITERATIONS {
138
+
eprintln!("⚠️ Max iterations reached while fetching nested subfs");
114
-
for (path, blob_info) in subfs_blob_map {
115
-
main_blob_map.insert(path, blob_info);
141
+
println!(" Total subfs records fetched: {}", all_subfs.len());
146
+
/// Extract subfs URIs from a subfs::Directory
147
+
fn extract_subfs_uris_from_subfs_dir(
148
+
directory: &crate::place_wisp::subfs::Directory,
149
+
current_path: String,
150
+
) -> Vec<(String, String)> {
151
+
let mut uris = Vec::new();
153
+
for entry in &directory.entries {
154
+
match &entry.node {
155
+
crate::place_wisp::subfs::EntryNode::Subfs(subfs_node) => {
156
+
// Check if this is a chunk entry (chunk0, chunk1, etc.)
157
+
// Chunks should be flat-merged, so use the parent's path
158
+
let mount_path = if entry.name.starts_with("chunk") &&
159
+
entry.name.chars().skip(5).all(|c| c.is_ascii_digit()) {
160
+
// This is a chunk - use parent's path for flat merge
161
+
println!(" → Found chunk {} at {}, will flat-merge to {}", entry.name, current_path, current_path);
162
+
current_path.clone()
164
+
// Normal subfs - append name to path
165
+
if current_path.is_empty() {
166
+
entry.name.to_string()
168
+
format!("{}/{}", current_path, entry.name)
172
+
uris.push((subfs_node.subject.to_string(), mount_path));
174
+
crate::place_wisp::subfs::EntryNode::Directory(subdir) => {
175
+
let full_path = if current_path.is_empty() {
176
+
entry.name.to_string()
178
+
format!("{}/{}", current_path, entry.name)
180
+
let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path);
181
+
uris.extend(nested);
118
-
total_merged += count;
119
-
println!(" ✓ Merged {} blobs from subfs at {}", count, mount_path);
190
+
/// Merge blob maps from subfs records into the main blob map (RECURSIVE)
191
+
/// Returns the total number of blobs merged from all subfs records
192
+
pub async fn merge_subfs_blob_maps(
193
+
agent: &Agent<impl AgentSession + IdentityResolver>,
194
+
subfs_uris: Vec<(String, String)>,
195
+
main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>,
196
+
) -> miette::Result<usize> {
197
+
// Fetch all subfs records recursively
198
+
let all_subfs = fetch_all_subfs_records_recursive(agent, subfs_uris).await?;
200
+
let mut total_merged = 0;
202
+
// Extract blobs from all fetched subfs records
203
+
// Skip parent records that only contain chunk references (no actual files)
204
+
for (mount_path, subfs_record) in all_subfs {
205
+
// Check if this record only contains chunk subfs references (no files)
206
+
let only_has_chunks = subfs_record.root.entries.iter().all(|e| {
207
+
matches!(&e.node, crate::place_wisp::subfs::EntryNode::Subfs(_)) &&
208
+
e.name.starts_with("chunk") &&
209
+
e.name.chars().skip(5).all(|c| c.is_ascii_digit())
212
+
if only_has_chunks && !subfs_record.root.entries.is_empty() {
213
+
// This is a parent containing only chunks - skip it, blobs are in the chunks
214
+
println!(" → Skipping parent subfs at {} ({} chunks, no files)", mount_path, subfs_record.root.entries.len());
218
+
let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone());
219
+
let count = subfs_blob_map.len();
221
+
for (path, blob_info) in subfs_blob_map {
222
+
main_blob_map.insert(path, blob_info);
225
+
total_merged += count;
226
+
println!(" ✓ Merged {} blobs from subfs at {}", count, mount_path);
···
444
+
/// Split a large directory into multiple smaller chunks
445
+
/// Returns a list of chunk directories, each small enough to fit in a subfs record
446
+
pub fn split_directory_into_chunks(
447
+
directory: &FsDirectory,
449
+
) -> Vec<FsDirectory<'static>> {
450
+
use jacquard_common::CowStr;
452
+
let mut chunks = Vec::new();
453
+
let mut current_chunk_entries = Vec::new();
454
+
let mut current_chunk_size = 100; // Base size for directory structure
456
+
for entry in &directory.entries {
457
+
// Estimate the size of this entry
458
+
let entry_size = estimate_entry_size(entry);
460
+
// If adding this entry would exceed the max size, start a new chunk
461
+
if !current_chunk_entries.is_empty() && (current_chunk_size + entry_size > max_size) {
462
+
// Create a chunk from current entries
463
+
let chunk = FsDirectory::new()
464
+
.r#type(CowStr::from("directory"))
465
+
.entries(current_chunk_entries.clone())
468
+
chunks.push(chunk);
471
+
current_chunk_entries.clear();
472
+
current_chunk_size = 100;
475
+
current_chunk_entries.push(entry.clone().into_static());
476
+
current_chunk_size += entry_size;
479
+
// Add the last chunk if it has any entries
480
+
if !current_chunk_entries.is_empty() {
481
+
let chunk = FsDirectory::new()
482
+
.r#type(CowStr::from("directory"))
483
+
.entries(current_chunk_entries)
485
+
chunks.push(chunk);
491
+
/// Estimate the JSON size of a single entry
492
+
fn estimate_entry_size(entry: &crate::place_wisp::fs::Entry) -> usize {
493
+
match serde_json::to_string(entry) {
494
+
Ok(json) => json.len(),
495
+
Err(_) => 500, // Conservative estimate if serialization fails