Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

Add support for existing blob reuse in deployment process

+1
cli/.gitignore
···
+
test/
.DS_STORE
jacquard/
binaries/
+3
cli/Cargo.lock
···
"jacquard-oauth",
"miette",
"mime_guess",
+
"multibase",
+
"multihash",
"reqwest",
"rustversion",
"serde",
"serde_json",
+
"sha2",
"shellexpand",
"tokio",
"walkdir",
+3
cli/Cargo.toml
···
mime_guess = "2.0"
bytes = "1.10"
futures = "0.3.31"
+
multihash = "0.19.3"
+
multibase = "0.9"
+
sha2 = "0.10"
+92
cli/src/blob_map.rs
···
+
use jacquard_common::types::blob::BlobRef;
+
use jacquard_common::IntoStatic;
+
use std::collections::HashMap;
+
+
use crate::place_wisp::fs::{Directory, EntryNode};
+
+
/// Extract blob information from a directory tree
+
/// Returns a map of file paths to their blob refs and CIDs
+
///
+
/// This mirrors the TypeScript implementation in src/lib/wisp-utils.ts lines 275-302
+
pub fn extract_blob_map(
+
directory: &Directory,
+
) -> HashMap<String, (BlobRef<'static>, String)> {
+
extract_blob_map_recursive(directory, String::new())
+
}
+
+
fn extract_blob_map_recursive(
+
directory: &Directory,
+
current_path: String,
+
) -> HashMap<String, (BlobRef<'static>, String)> {
+
let mut blob_map = HashMap::new();
+
+
for entry in &directory.entries {
+
let full_path = if current_path.is_empty() {
+
entry.name.to_string()
+
} else {
+
format!("{}/{}", current_path, entry.name)
+
};
+
+
match &entry.node {
+
EntryNode::File(file_node) => {
+
// Extract CID from blob ref
+
// BlobRef is an enum with Blob variant, which has a ref field (CidLink)
+
let blob_ref = &file_node.blob;
+
let cid_string = blob_ref.blob().r#ref.to_string();
+
+
// Store both normalized and full paths
+
// Normalize by removing base folder prefix (e.g., "cobblemon/index.html" -> "index.html")
+
let normalized_path = normalize_path(&full_path);
+
+
blob_map.insert(
+
normalized_path.clone(),
+
(blob_ref.clone().into_static(), cid_string.clone())
+
);
+
+
// Also store the full path for matching
+
if normalized_path != full_path {
+
blob_map.insert(
+
full_path,
+
(blob_ref.clone().into_static(), cid_string)
+
);
+
}
+
}
+
EntryNode::Directory(subdir) => {
+
let sub_map = extract_blob_map_recursive(subdir, full_path);
+
blob_map.extend(sub_map);
+
}
+
EntryNode::Unknown(_) => {
+
// Skip unknown node types
+
}
+
}
+
}
+
+
blob_map
+
}
+
+
/// Normalize file path by removing base folder prefix
+
/// Example: "cobblemon/index.html" -> "index.html"
+
///
+
/// Mirrors TypeScript implementation at src/routes/wisp.ts line 291
+
pub fn normalize_path(path: &str) -> String {
+
// Remove base folder prefix (everything before first /)
+
if let Some(idx) = path.find('/') {
+
path[idx + 1..].to_string()
+
} else {
+
path.to_string()
+
}
+
}
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
+
#[test]
+
fn test_normalize_path() {
+
assert_eq!(normalize_path("index.html"), "index.html");
+
assert_eq!(normalize_path("cobblemon/index.html"), "index.html");
+
assert_eq!(normalize_path("folder/subfolder/file.txt"), "subfolder/file.txt");
+
assert_eq!(normalize_path("a/b/c/d.txt"), "b/c/d.txt");
+
}
+
}
+
+66
cli/src/cid.rs
···
+
use jacquard_common::types::cid::IpldCid;
+
use sha2::{Digest, Sha256};
+
+
/// Compute CID (Content Identifier) for blob content
+
/// Uses the same algorithm as AT Protocol: CIDv1 with raw codec (0x55) and SHA-256
+
///
+
/// CRITICAL: This must be called on BASE64-ENCODED GZIPPED content, not just gzipped content
+
///
+
/// Based on @atproto/common/src/ipld.ts sha256RawToCid implementation
+
pub fn compute_cid(content: &[u8]) -> String {
+
// Use node crypto to compute sha256 hash (same as AT Protocol)
+
let hash = Sha256::digest(content);
+
+
// Create multihash (code 0x12 = sha2-256)
+
let multihash = multihash::Multihash::wrap(0x12, &hash)
+
.expect("SHA-256 hash should always fit in multihash");
+
+
// Create CIDv1 with raw codec (0x55)
+
let cid = IpldCid::new_v1(0x55, multihash);
+
+
// Convert to base32 string representation
+
cid.to_string_of_base(multibase::Base::Base32Lower)
+
.unwrap_or_else(|_| cid.to_string())
+
}
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
use base64::Engine;
+
+
#[test]
+
fn test_compute_cid() {
+
// Test with a simple string: "hello"
+
let content = b"hello";
+
let cid = compute_cid(content);
+
+
// CID should start with 'baf' for raw codec base32
+
assert!(cid.starts_with("baf"));
+
}
+
+
#[test]
+
fn test_compute_cid_base64_encoded() {
+
// Simulate the actual use case: gzipped then base64 encoded
+
use flate2::write::GzEncoder;
+
use flate2::Compression;
+
use std::io::Write;
+
+
let original = b"hello world";
+
+
// Gzip compress
+
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+
encoder.write_all(original).unwrap();
+
let gzipped = encoder.finish().unwrap();
+
+
// Base64 encode the gzipped data
+
let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes();
+
+
// Compute CID on the base64 bytes
+
let cid = compute_cid(&base64_bytes);
+
+
// Should be a valid CID
+
assert!(cid.starts_with("baf"));
+
assert!(cid.len() > 10);
+
}
+
}
+
+121 -38
cli/src/main.rs
···
mod builder_types;
mod place_wisp;
+
mod cid;
+
mod blob_map;
use clap::Parser;
use jacquard::CowStr;
-
use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession};
+
use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession};
use jacquard::oauth::client::OAuthClient;
use jacquard::oauth::loopback::LoopbackConfig;
use jacquard::prelude::IdentityResolver;
···
use jacquard_common::types::blob::MimeType;
use miette::IntoDiagnostic;
use std::path::{Path, PathBuf};
+
use std::collections::HashMap;
use flate2::Compression;
use flate2::write::GzEncoder;
use std::io::Write;
···
println!("Deploying site '{}'...", site_name);
-
// Build directory tree
-
let root_dir = build_directory(agent, &path).await?;
+
// Try to fetch existing manifest for incremental updates
+
let existing_blob_map: HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)> = {
+
use jacquard_common::types::string::AtUri;
+
+
// Get the DID for this session
+
let session_info = agent.session_info().await;
+
if let Some((did, _)) = session_info {
+
// Construct the AT URI for the record
+
let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name);
+
if let Ok(uri) = AtUri::new(&uri_string) {
+
match agent.get_record::<Fs>(&uri).await {
+
Ok(response) => {
+
match response.into_output() {
+
Ok(record_output) => {
+
let existing_manifest = record_output.value;
+
let blob_map = blob_map::extract_blob_map(&existing_manifest.root);
+
println!("Found existing manifest with {} files, checking for changes...", blob_map.len());
+
blob_map
+
}
+
Err(_) => {
+
println!("No existing manifest found, uploading all files...");
+
HashMap::new()
+
}
+
}
+
}
+
Err(_) => {
+
// Record doesn't exist yet - this is a new site
+
println!("No existing manifest found, uploading all files...");
+
HashMap::new()
+
}
+
}
+
} else {
+
println!("No existing manifest found (invalid URI), uploading all files...");
+
HashMap::new()
+
}
+
} else {
+
println!("No existing manifest found (could not get DID), uploading all files...");
+
HashMap::new()
+
}
+
};
-
// Count total files
-
let file_count = count_files(&root_dir);
+
// Build directory tree
+
let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map).await?;
+
let uploaded_count = total_files - reused_count;
// Create the Fs record
let fs_record = Fs::new()
.site(CowStr::from(site_name.clone()))
.root(root_dir)
-
.file_count(file_count as i64)
+
.file_count(total_files as i64)
.created_at(Datetime::now())
.build();
···
.and_then(|s| s.split('/').next())
.ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?;
-
println!("Deployed site '{}': {}", site_name, output.uri);
-
println!("Available at: https://sites.wisp.place/{}/{}", did, site_name);
+
println!("\n✓ Deployed site '{}': {}", site_name, output.uri);
+
println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count);
+
println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name);
Ok(())
}
···
fn build_directory<'a>(
agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>,
dir_path: &'a Path,
-
) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<Directory<'static>>> + 'a>>
+
existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>>
{
Box::pin(async move {
// Collect all directory entries first
···
}
// Process files concurrently with a limit of 5
-
let file_entries: Vec<Entry> = stream::iter(file_tasks)
+
let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks)
.map(|(name, path)| async move {
-
let file_node = process_file(agent, &path).await?;
-
Ok::<_, miette::Report>(Entry::new()
+
let (file_node, reused) = process_file(agent, &path, &name, existing_blobs).await?;
+
let entry = Entry::new()
.name(CowStr::from(name))
.node(EntryNode::File(Box::new(file_node)))
-
.build())
+
.build();
+
Ok::<_, miette::Report>((entry, reused))
})
.buffer_unordered(5)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<miette::Result<Vec<_>>>()?;
+
+
let mut file_entries = Vec::new();
+
let mut reused_count = 0;
+
let mut total_files = 0;
+
+
for (entry, reused) in file_results {
+
file_entries.push(entry);
+
total_files += 1;
+
if reused {
+
reused_count += 1;
+
}
+
}
// Process directories recursively (sequentially to avoid too much nesting)
let mut dir_entries = Vec::new();
for (name, path) in dir_tasks {
-
let subdir = build_directory(agent, &path).await?;
+
let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs).await?;
dir_entries.push(Entry::new()
.name(CowStr::from(name))
.node(EntryNode::Directory(Box::new(subdir)))
.build());
+
total_files += sub_total;
+
reused_count += sub_reused;
}
// Combine file and directory entries
let mut entries = file_entries;
entries.extend(dir_entries);
-
Ok(Directory::new()
+
let directory = Directory::new()
.r#type(CowStr::from("directory"))
.entries(entries)
-
.build())
+
.build();
+
+
Ok((directory, total_files, reused_count))
})
}
-
/// Process a single file: gzip -> base64 -> upload blob
+
/// Process a single file: gzip -> base64 -> upload blob (or reuse existing)
+
/// Returns (File, reused: bool)
async fn process_file(
agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>,
file_path: &Path,
-
) -> miette::Result<File<'static>>
+
file_name: &str,
+
existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
+
) -> miette::Result<(File<'static>, bool)>
{
// Read file
let file_data = std::fs::read(file_path).into_diagnostic()?;
···
// Base64 encode the gzipped data
let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes();
-
// Upload blob as octet-stream
+
// Compute CID for this file (CRITICAL: on base64-encoded gzipped content)
+
let file_cid = cid::compute_cid(&base64_bytes);
+
+
// Normalize the file path for comparison
+
let normalized_path = blob_map::normalize_path(file_name);
+
+
// Check if we have an existing blob with the same CID
+
let existing_blob = existing_blobs.get(&normalized_path)
+
.or_else(|| existing_blobs.get(file_name));
+
+
if let Some((existing_blob_ref, existing_cid)) = existing_blob {
+
if existing_cid == &file_cid {
+
// CIDs match - reuse existing blob
+
println!(" ✓ Reusing blob for {} (CID: {})", file_name, file_cid);
+
return Ok((
+
File::new()
+
.r#type(CowStr::from("file"))
+
.blob(existing_blob_ref.clone())
+
.encoding(CowStr::from("gzip"))
+
.mime_type(CowStr::from(original_mime))
+
.base64(true)
+
.build(),
+
true
+
));
+
}
+
}
+
+
// File is new or changed - upload it
+
println!(" ↑ Uploading {} ({} bytes, CID: {})", file_name, base64_bytes.len(), file_cid);
let blob = agent.upload_blob(
base64_bytes,
MimeType::new_static("application/octet-stream"),
).await?;
-
Ok(File::new()
-
.r#type(CowStr::from("file"))
-
.blob(blob)
-
.encoding(CowStr::from("gzip"))
-
.mime_type(CowStr::from(original_mime))
-
.base64(true)
-
.build())
+
Ok((
+
File::new()
+
.r#type(CowStr::from("file"))
+
.blob(blob)
+
.encoding(CowStr::from("gzip"))
+
.mime_type(CowStr::from(original_mime))
+
.base64(true)
+
.build(),
+
false
+
))
}
-
/// Count total files in a directory tree
-
fn count_files(dir: &Directory) -> usize {
-
let mut count = 0;
-
for entry in &dir.entries {
-
match &entry.node {
-
EntryNode::File(_) => count += 1,
-
EntryNode::Directory(subdir) => count += count_files(subdir),
-
_ => {} // Unknown variants
-
}
-
}
-
count
-
}