rudimentry _redirects support, incremental uploading for cli #3

closed
opened by nekomimi.pet targeting main

TODO _headers file place.wisp.settings lexicon as a lexiconal way of configuring this

+31 -1
README.md
···
cargo build
```
+
## Features
+
+
### URL Redirects and Rewrites
+
+
The hosting service supports Netlify-style `_redirects` files for managing URLs. Place a `_redirects` file in your site root to enable:
+
+
- **301/302 Redirects**: Permanent and temporary URL redirects
+
- **200 Rewrites**: Serve different content without changing the URL
+
- **404 Custom Pages**: Custom error pages for specific paths
+
- **Splats & Placeholders**: Dynamic path matching (`/blog/:year/:month/:day`, `/news/*`)
+
- **Query Parameter Matching**: Redirect based on URL parameters
+
- **Conditional Redirects**: Route by country, language, or cookie presence
+
- **Force Redirects**: Override existing files with redirects
+
+
Example `_redirects`:
+
```
+
# Single-page app routing (React, Vue, etc.)
+
/* /index.html 200
+
+
# Simple redirects
+
/home /
+
/old-blog/* /blog/:splat
+
+
# API proxy
+
/api/* https://api.example.com/:splat 200
+
+
# Country-based routing
+
/ /us/ 302 Country=us
+
/ /uk/ 302 Country=gb
+
```
+
## Limits
- Max file size: 100MB (PDS limit)
-
- Max site size: 300MB
- Max files: 2000
## Tech Stack
-123
hosting-service/EXAMPLE.md
···
-
# HTML Path Rewriting Example
-
-
This document demonstrates how HTML path rewriting works when serving sites via the `/s/:identifier/:site/*` route.
-
-
## Problem
-
-
When you create a static site with absolute paths like `/style.css` or `/images/logo.png`, these paths work fine when served from the root domain. However, when served from a subdirectory like `/s/alice.bsky.social/mysite/`, these absolute paths break because they resolve to the server root instead of the site root.
-
-
## Solution
-
-
The hosting service automatically rewrites absolute paths in HTML files to work correctly in the subdirectory context.
-
-
## Example
-
-
**Original HTML file (index.html):**
-
```html
-
<!DOCTYPE html>
-
<html>
-
<head>
-
<meta charset="UTF-8">
-
<title>My Site</title>
-
<link rel="stylesheet" href="/style.css">
-
<link rel="icon" href="/favicon.ico">
-
<script src="/app.js"></script>
-
</head>
-
<body>
-
<header>
-
<img src="/images/logo.png" alt="Logo">
-
<nav>
-
<a href="/">Home</a>
-
<a href="/about">About</a>
-
<a href="/contact">Contact</a>
-
</nav>
-
</header>
-
-
<main>
-
<h1>Welcome</h1>
-
<img src="/images/hero.jpg"
-
srcset="/images/hero.jpg 1x, /images/hero@2x.jpg 2x"
-
alt="Hero">
-
-
<form action="/submit" method="post">
-
<input type="text" name="email">
-
<button>Submit</button>
-
</form>
-
</main>
-
-
<footer>
-
<a href="https://example.com">External Link</a>
-
<a href="#top">Back to Top</a>
-
</footer>
-
</body>
-
</html>
-
```
-
-
**When accessed via `/s/alice.bsky.social/mysite/`, the HTML is rewritten to:**
-
```html
-
<!DOCTYPE html>
-
<html>
-
<head>
-
<meta charset="UTF-8">
-
<title>My Site</title>
-
<link rel="stylesheet" href="/s/alice.bsky.social/mysite/style.css">
-
<link rel="icon" href="/s/alice.bsky.social/mysite/favicon.ico">
-
<script src="/s/alice.bsky.social/mysite/app.js"></script>
-
</head>
-
<body>
-
<header>
-
<img src="/s/alice.bsky.social/mysite/images/logo.png" alt="Logo">
-
<nav>
-
<a href="/s/alice.bsky.social/mysite/">Home</a>
-
<a href="/s/alice.bsky.social/mysite/about">About</a>
-
<a href="/s/alice.bsky.social/mysite/contact">Contact</a>
-
</nav>
-
</header>
-
-
<main>
-
<h1>Welcome</h1>
-
<img src="/s/alice.bsky.social/mysite/images/hero.jpg"
-
srcset="/s/alice.bsky.social/mysite/images/hero.jpg 1x, /s/alice.bsky.social/mysite/images/hero@2x.jpg 2x"
-
alt="Hero">
-
-
<form action="/s/alice.bsky.social/mysite/submit" method="post">
-
<input type="text" name="email">
-
<button>Submit</button>
-
</form>
-
</main>
-
-
<footer>
-
<a href="https://example.com">External Link</a>
-
<a href="#top">Back to Top</a>
-
</footer>
-
</body>
-
</html>
-
```
-
-
## What's Preserved
-
-
Notice that:
-
- ✅ Absolute paths are rewritten: `/style.css` → `/s/alice.bsky.social/mysite/style.css`
-
- ✅ External URLs are preserved: `https://example.com` stays the same
-
- ✅ Anchors are preserved: `#top` stays the same
-
- ✅ The rewriting is safe and won't break your site
-
-
## Supported Attributes
-
-
The rewriter handles these HTML attributes:
-
- `src` - images, scripts, iframes, videos, audio
-
- `href` - links, stylesheets
-
- `action` - forms
-
- `data` - objects
-
- `poster` - video posters
-
- `srcset` - responsive images
-
-
## Testing Your Site
-
-
To test if your site works with path rewriting:
-
-
1. Upload your site to your PDS as a `place.wisp.fs` record
-
2. Access it via: `https://hosting.wisp.place/s/YOUR_HANDLE/SITE_NAME/`
-
3. Check that all resources load correctly
-
-
If you're using relative paths already (like `./style.css` or `../images/logo.png`), they'll work without any rewriting.
+134
hosting-service/example-_redirects
···
+
# Example _redirects file for Wisp hosting
+
# Place this file in the root directory of your site as "_redirects"
+
# Lines starting with # are comments
+
+
# ===================================
+
# SIMPLE REDIRECTS
+
# ===================================
+
+
# Redirect home page
+
# /home /
+
+
# Redirect old URLs to new ones
+
# /old-blog /blog
+
# /about-us /about
+
+
# ===================================
+
# SPLAT REDIRECTS (WILDCARDS)
+
# ===================================
+
+
# Redirect entire directories
+
# /news/* /blog/:splat
+
# /old-site/* /new-site/:splat
+
+
# ===================================
+
# PLACEHOLDER REDIRECTS
+
# ===================================
+
+
# Restructure blog URLs
+
# /blog/:year/:month/:day/:slug /posts/:year-:month-:day/:slug
+
+
# Capture multiple parameters
+
# /products/:category/:id /shop/:category/item/:id
+
+
# ===================================
+
# STATUS CODES
+
# ===================================
+
+
# Permanent redirect (301) - default if not specified
+
# /permanent-move /new-location 301
+
+
# Temporary redirect (302)
+
# /temp-redirect /temp-location 302
+
+
# Rewrite (200) - serves different content, URL stays the same
+
# /api/* /functions/:splat 200
+
+
# Custom 404 page
+
# /shop/* /shop-closed.html 404
+
+
# ===================================
+
# FORCE REDIRECTS
+
# ===================================
+
+
# Force redirect even if file exists (note the ! after status code)
+
# /override-file /other-file.html 200!
+
+
# ===================================
+
# CONDITIONAL REDIRECTS
+
# ===================================
+
+
# Country-based redirects (ISO 3166-1 alpha-2 codes)
+
# / /us/ 302 Country=us
+
# / /uk/ 302 Country=gb
+
# / /anz/ 302 Country=au,nz
+
+
# Language-based redirects
+
# /products /en/products 301 Language=en
+
# /products /de/products 301 Language=de
+
# /products /fr/products 301 Language=fr
+
+
# Cookie-based redirects (checks if cookie exists)
+
# /* /legacy/:splat 200 Cookie=is_legacy
+
+
# ===================================
+
# QUERY PARAMETERS
+
# ===================================
+
+
# Match specific query parameters
+
# /store id=:id /blog/:id 301
+
+
# Multiple parameters
+
# /search q=:query category=:cat /find/:cat/:query 301
+
+
# ===================================
+
# DOMAIN-LEVEL REDIRECTS
+
# ===================================
+
+
# Redirect to different domain (must include protocol)
+
# /external https://example.com/path
+
+
# Redirect entire subdomain
+
# http://blog.example.com/* https://example.com/blog/:splat 301!
+
# https://blog.example.com/* https://example.com/blog/:splat 301!
+
+
# ===================================
+
# COMMON PATTERNS
+
# ===================================
+
+
# Remove .html extensions
+
# /page.html /page
+
+
# Add trailing slash
+
# /about /about/
+
+
# Single-page app fallback (serve index.html for all paths)
+
# /* /index.html 200
+
+
# API proxy
+
# /api/* https://api.example.com/:splat 200
+
+
# ===================================
+
# CUSTOM ERROR PAGES
+
# ===================================
+
+
# Language-specific 404 pages
+
# /en/* /en/404.html 404
+
# /de/* /de/404.html 404
+
+
# Section-specific 404 pages
+
# /shop/* /shop/not-found.html 404
+
# /blog/* /blog/404.html 404
+
+
# ===================================
+
# NOTES
+
# ===================================
+
#
+
# - Rules are processed in order (first match wins)
+
# - More specific rules should come before general ones
+
# - Splats (*) can only be used at the end of a path
+
# - Query parameters are automatically preserved for 200, 301, 302
+
# - Trailing slashes are normalized (/ and no / are treated the same)
+
# - Default status code is 301 if not specified
+
#
+
+215
hosting-service/src/lib/redirects.test.ts
···
+
import { describe, it, expect } from 'bun:test'
+
import { parseRedirectsFile, matchRedirectRule } from './redirects';
+
+
describe('parseRedirectsFile', () => {
+
it('should parse simple redirects', () => {
+
const content = `
+
# Comment line
+
/old-path /new-path
+
/home / 301
+
`;
+
const rules = parseRedirectsFile(content);
+
expect(rules).toHaveLength(2);
+
expect(rules[0]).toMatchObject({
+
from: '/old-path',
+
to: '/new-path',
+
status: 301,
+
force: false,
+
});
+
expect(rules[1]).toMatchObject({
+
from: '/home',
+
to: '/',
+
status: 301,
+
force: false,
+
});
+
});
+
+
it('should parse redirects with different status codes', () => {
+
const content = `
+
/temp-redirect /target 302
+
/rewrite /content 200
+
/not-found /404 404
+
`;
+
const rules = parseRedirectsFile(content);
+
expect(rules).toHaveLength(3);
+
expect(rules[0]?.status).toBe(302);
+
expect(rules[1]?.status).toBe(200);
+
expect(rules[2]?.status).toBe(404);
+
});
+
+
it('should parse force redirects', () => {
+
const content = `/force-path /target 301!`;
+
const rules = parseRedirectsFile(content);
+
expect(rules[0]?.force).toBe(true);
+
expect(rules[0]?.status).toBe(301);
+
});
+
+
it('should parse splat redirects', () => {
+
const content = `/news/* /blog/:splat`;
+
const rules = parseRedirectsFile(content);
+
expect(rules[0]?.from).toBe('/news/*');
+
expect(rules[0]?.to).toBe('/blog/:splat');
+
});
+
+
it('should parse placeholder redirects', () => {
+
const content = `/blog/:year/:month/:day /posts/:year-:month-:day`;
+
const rules = parseRedirectsFile(content);
+
expect(rules[0]?.from).toBe('/blog/:year/:month/:day');
+
expect(rules[0]?.to).toBe('/posts/:year-:month-:day');
+
});
+
+
it('should parse country-based redirects', () => {
+
const content = `/ /anz 302 Country=au,nz`;
+
const rules = parseRedirectsFile(content);
+
expect(rules[0]?.conditions?.country).toEqual(['au', 'nz']);
+
});
+
+
it('should parse language-based redirects', () => {
+
const content = `/products /en/products 301 Language=en`;
+
const rules = parseRedirectsFile(content);
+
expect(rules[0]?.conditions?.language).toEqual(['en']);
+
});
+
+
it('should parse cookie-based redirects', () => {
+
const content = `/* /legacy/:splat 200 Cookie=is_legacy,my_cookie`;
+
const rules = parseRedirectsFile(content);
+
expect(rules[0]?.conditions?.cookie).toEqual(['is_legacy', 'my_cookie']);
+
});
+
});
+
+
describe('matchRedirectRule', () => {
+
it('should match exact paths', () => {
+
const rules = parseRedirectsFile('/old-path /new-path');
+
const match = matchRedirectRule('/old-path', rules);
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/new-path');
+
expect(match?.status).toBe(301);
+
});
+
+
it('should match paths with trailing slash', () => {
+
const rules = parseRedirectsFile('/old-path /new-path');
+
const match = matchRedirectRule('/old-path/', rules);
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/new-path');
+
});
+
+
it('should match splat patterns', () => {
+
const rules = parseRedirectsFile('/news/* /blog/:splat');
+
const match = matchRedirectRule('/news/2024/01/15/my-post', rules);
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/blog/2024/01/15/my-post');
+
});
+
+
it('should match placeholder patterns', () => {
+
const rules = parseRedirectsFile('/blog/:year/:month/:day /posts/:year-:month-:day');
+
const match = matchRedirectRule('/blog/2024/01/15', rules);
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/posts/2024-01-15');
+
});
+
+
it('should preserve query strings for 301/302 redirects', () => {
+
const rules = parseRedirectsFile('/old /new 301');
+
const match = matchRedirectRule('/old', rules, {
+
queryParams: { foo: 'bar', baz: 'qux' },
+
});
+
expect(match?.targetPath).toContain('?');
+
expect(match?.targetPath).toContain('foo=bar');
+
expect(match?.targetPath).toContain('baz=qux');
+
});
+
+
it('should match based on query parameters', () => {
+
const rules = parseRedirectsFile('/store id=:id /blog/:id 301');
+
const match = matchRedirectRule('/store', rules, {
+
queryParams: { id: 'my-post' },
+
});
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toContain('/blog/my-post');
+
});
+
+
it('should not match when query params are missing', () => {
+
const rules = parseRedirectsFile('/store id=:id /blog/:id 301');
+
const match = matchRedirectRule('/store', rules, {
+
queryParams: {},
+
});
+
expect(match).toBeNull();
+
});
+
+
it('should match based on country header', () => {
+
const rules = parseRedirectsFile('/ /aus 302 Country=au');
+
const match = matchRedirectRule('/', rules, {
+
headers: { 'cf-ipcountry': 'AU' },
+
});
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/aus');
+
});
+
+
it('should not match wrong country', () => {
+
const rules = parseRedirectsFile('/ /aus 302 Country=au');
+
const match = matchRedirectRule('/', rules, {
+
headers: { 'cf-ipcountry': 'US' },
+
});
+
expect(match).toBeNull();
+
});
+
+
it('should match based on language header', () => {
+
const rules = parseRedirectsFile('/products /en/products 301 Language=en');
+
const match = matchRedirectRule('/products', rules, {
+
headers: { 'accept-language': 'en-US,en;q=0.9' },
+
});
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/en/products');
+
});
+
+
it('should match based on cookie presence', () => {
+
const rules = parseRedirectsFile('/* /legacy/:splat 200 Cookie=is_legacy');
+
const match = matchRedirectRule('/some-path', rules, {
+
cookies: { is_legacy: 'true' },
+
});
+
expect(match).toBeTruthy();
+
expect(match?.targetPath).toBe('/legacy/some-path');
+
});
+
+
it('should return first matching rule', () => {
+
const content = `
+
/path /first
+
/path /second
+
`;
+
const rules = parseRedirectsFile(content);
+
const match = matchRedirectRule('/path', rules);
+
expect(match?.targetPath).toBe('/first');
+
});
+
+
it('should match more specific rules before general ones', () => {
+
const content = `
+
/jobs/customer-ninja /careers/support
+
/jobs/* /careers/:splat
+
`;
+
const rules = parseRedirectsFile(content);
+
+
const match1 = matchRedirectRule('/jobs/customer-ninja', rules);
+
expect(match1?.targetPath).toBe('/careers/support');
+
+
const match2 = matchRedirectRule('/jobs/developer', rules);
+
expect(match2?.targetPath).toBe('/careers/developer');
+
});
+
+
it('should handle SPA routing pattern', () => {
+
const rules = parseRedirectsFile('/* /index.html 200');
+
+
// Should match any path
+
const match1 = matchRedirectRule('/about', rules);
+
expect(match1).toBeTruthy();
+
expect(match1?.targetPath).toBe('/index.html');
+
expect(match1?.status).toBe(200);
+
+
const match2 = matchRedirectRule('/users/123/profile', rules);
+
expect(match2).toBeTruthy();
+
expect(match2?.targetPath).toBe('/index.html');
+
expect(match2?.status).toBe(200);
+
+
const match3 = matchRedirectRule('/', rules);
+
expect(match3).toBeTruthy();
+
expect(match3?.targetPath).toBe('/index.html');
+
});
+
});
+
+413
hosting-service/src/lib/redirects.ts
···
+
import { readFile } from 'fs/promises';
+
import { existsSync } from 'fs';
+
+
export interface RedirectRule {
+
from: string;
+
to: string;
+
status: number;
+
force: boolean;
+
conditions?: {
+
country?: string[];
+
language?: string[];
+
role?: string[];
+
cookie?: string[];
+
};
+
// For pattern matching
+
fromPattern?: RegExp;
+
fromParams?: string[]; // Named parameters from the pattern
+
queryParams?: Record<string, string>; // Expected query parameters
+
}
+
+
export interface RedirectMatch {
+
rule: RedirectRule;
+
targetPath: string;
+
status: number;
+
}
+
+
/**
+
* Parse a _redirects file into an array of redirect rules
+
*/
+
export function parseRedirectsFile(content: string): RedirectRule[] {
+
const lines = content.split('\n');
+
const rules: RedirectRule[] = [];
+
+
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
+
const lineRaw = lines[lineNum];
+
if (!lineRaw) continue;
+
+
const line = lineRaw.trim();
+
+
// Skip empty lines and comments
+
if (!line || line.startsWith('#')) {
+
continue;
+
}
+
+
try {
+
const rule = parseRedirectLine(line);
+
if (rule && rule.fromPattern) {
+
rules.push(rule);
+
}
+
} catch (err) {
+
console.warn(`Failed to parse redirect rule on line ${lineNum + 1}: ${line}`, err);
+
}
+
}
+
+
return rules;
+
}
+
+
/**
+
* Parse a single redirect rule line
+
* Format: /from [query_params] /to [status] [conditions]
+
*/
+
function parseRedirectLine(line: string): RedirectRule | null {
+
// Split by whitespace, but respect quoted strings (though not commonly used)
+
const parts = line.split(/\s+/);
+
+
if (parts.length < 2) {
+
return null;
+
}
+
+
let idx = 0;
+
const from = parts[idx++];
+
+
if (!from) {
+
return null;
+
}
+
+
let status = 301; // Default status
+
let force = false;
+
const conditions: NonNullable<RedirectRule['conditions']> = {};
+
const queryParams: Record<string, string> = {};
+
+
// Parse query parameters that come before the destination path
+
// They look like: key=:value (and don't start with /)
+
while (idx < parts.length) {
+
const part = parts[idx];
+
if (!part) {
+
idx++;
+
continue;
+
}
+
+
// If it starts with / or http, it's the destination path
+
if (part.startsWith('/') || part.startsWith('http://') || part.startsWith('https://')) {
+
break;
+
}
+
+
// If it contains = and comes before the destination, it's a query param
+
if (part.includes('=')) {
+
const splitIndex = part.indexOf('=');
+
const key = part.slice(0, splitIndex);
+
const value = part.slice(splitIndex + 1);
+
+
if (key && value) {
+
queryParams[key] = value;
+
}
+
idx++;
+
} else {
+
// Not a query param, must be destination or something else
+
break;
+
}
+
}
+
+
// Next part should be the destination
+
if (idx >= parts.length) {
+
return null;
+
}
+
+
const to = parts[idx++];
+
if (!to) {
+
return null;
+
}
+
+
// Parse remaining parts for status code and conditions
+
for (let i = idx; i < parts.length; i++) {
+
const part = parts[i];
+
+
if (!part) continue;
+
+
// Check for status code (with optional ! for force)
+
if (/^\d+!?$/.test(part)) {
+
if (part.endsWith('!')) {
+
force = true;
+
status = parseInt(part.slice(0, -1));
+
} else {
+
status = parseInt(part);
+
}
+
continue;
+
}
+
+
// Check for condition parameters (Country=, Language=, Role=, Cookie=)
+
if (part.includes('=')) {
+
const splitIndex = part.indexOf('=');
+
const key = part.slice(0, splitIndex);
+
const value = part.slice(splitIndex + 1);
+
+
if (!key || !value) continue;
+
+
const keyLower = key.toLowerCase();
+
+
if (keyLower === 'country') {
+
conditions.country = value.split(',').map(v => v.trim().toLowerCase());
+
} else if (keyLower === 'language') {
+
conditions.language = value.split(',').map(v => v.trim().toLowerCase());
+
} else if (keyLower === 'role') {
+
conditions.role = value.split(',').map(v => v.trim());
+
} else if (keyLower === 'cookie') {
+
conditions.cookie = value.split(',').map(v => v.trim().toLowerCase());
+
}
+
}
+
}
+
+
// Parse the 'from' pattern
+
const { pattern, params } = convertPathToRegex(from);
+
+
return {
+
from,
+
to,
+
status,
+
force,
+
conditions: Object.keys(conditions).length > 0 ? conditions : undefined,
+
queryParams: Object.keys(queryParams).length > 0 ? queryParams : undefined,
+
fromPattern: pattern,
+
fromParams: params,
+
};
+
}
+
+
/**
+
* Convert a path pattern with placeholders and splats to a regex
+
* Examples:
+
* /blog/:year/:month/:day -> captures year, month, day
+
* /news/* -> captures splat
+
*/
+
function convertPathToRegex(pattern: string): { pattern: RegExp; params: string[] } {
+
const params: string[] = [];
+
let regexStr = '^';
+
+
// Split by query string if present
+
const pathPart = pattern.split('?')[0] || pattern;
+
+
// Escape special regex characters except * and :
+
let escaped = pathPart.replace(/[.+^${}()|[\]\\]/g, '\\$&');
+
+
// Replace :param with named capture groups
+
escaped = escaped.replace(/:([a-zA-Z_][a-zA-Z0-9_]*)/g, (match, paramName) => {
+
params.push(paramName);
+
// Match path segment (everything except / and ?)
+
return '([^/?]+)';
+
});
+
+
// Replace * with splat capture (matches everything including /)
+
if (escaped.includes('*')) {
+
escaped = escaped.replace(/\*/g, '(.*)');
+
params.push('splat');
+
}
+
+
regexStr += escaped;
+
+
// Make trailing slash optional
+
if (!regexStr.endsWith('.*')) {
+
regexStr += '/?';
+
}
+
+
regexStr += '$';
+
+
return {
+
pattern: new RegExp(regexStr),
+
params,
+
};
+
}
+
+
/**
+
* Match a request path against redirect rules
+
*/
+
export function matchRedirectRule(
+
requestPath: string,
+
rules: RedirectRule[],
+
context?: {
+
queryParams?: Record<string, string>;
+
headers?: Record<string, string>;
+
cookies?: Record<string, string>;
+
}
+
): RedirectMatch | null {
+
// Normalize path: ensure leading slash, remove trailing slash (except for root)
+
let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`;
+
+
for (const rule of rules) {
+
// Check query parameter conditions first (if any)
+
if (rule.queryParams) {
+
// If rule requires query params but none provided, skip this rule
+
if (!context?.queryParams) {
+
continue;
+
}
+
+
const queryMatches = Object.entries(rule.queryParams).every(([key, value]) => {
+
const actualValue = context.queryParams?.[key];
+
return actualValue !== undefined;
+
});
+
+
if (!queryMatches) {
+
continue;
+
}
+
}
+
+
// Check conditional redirects (country, language, role, cookie)
+
if (rule.conditions) {
+
if (rule.conditions.country && context?.headers) {
+
const cfCountry = context.headers['cf-ipcountry'];
+
const xCountry = context.headers['x-country'];
+
const country = (cfCountry?.toLowerCase() || xCountry?.toLowerCase());
+
if (!country || !rule.conditions.country.includes(country)) {
+
continue;
+
}
+
}
+
+
if (rule.conditions.language && context?.headers) {
+
const acceptLang = context.headers['accept-language'];
+
if (!acceptLang) {
+
continue;
+
}
+
// Parse accept-language header (simplified)
+
const langs = acceptLang.split(',').map(l => {
+
const langPart = l.split(';')[0];
+
return langPart ? langPart.trim().toLowerCase() : '';
+
}).filter(l => l !== '');
+
const hasMatch = rule.conditions.language.some(lang =>
+
langs.some(l => l === lang || l.startsWith(lang + '-'))
+
);
+
if (!hasMatch) {
+
continue;
+
}
+
}
+
+
if (rule.conditions.cookie && context?.cookies) {
+
const hasCookie = rule.conditions.cookie.some(cookieName =>
+
context.cookies && cookieName in context.cookies
+
);
+
if (!hasCookie) {
+
continue;
+
}
+
}
+
+
// Role-based redirects would need JWT verification - skip for now
+
if (rule.conditions.role) {
+
continue;
+
}
+
}
+
+
// Match the path pattern
+
const match = rule.fromPattern?.exec(normalizedPath);
+
if (!match) {
+
continue;
+
}
+
+
// Build the target path by replacing placeholders
+
let targetPath = rule.to;
+
+
// Replace captured parameters
+
if (rule.fromParams && match.length > 1) {
+
for (let i = 0; i < rule.fromParams.length; i++) {
+
const paramName = rule.fromParams[i];
+
const paramValue = match[i + 1];
+
+
if (!paramName || !paramValue) continue;
+
+
if (paramName === 'splat') {
+
targetPath = targetPath.replace(':splat', paramValue);
+
} else {
+
targetPath = targetPath.replace(`:${paramName}`, paramValue);
+
}
+
}
+
}
+
+
// Handle query parameter replacements
+
if (rule.queryParams && context?.queryParams) {
+
for (const [key, placeholder] of Object.entries(rule.queryParams)) {
+
const actualValue = context.queryParams[key];
+
if (actualValue && placeholder && placeholder.startsWith(':')) {
+
const paramName = placeholder.slice(1);
+
if (paramName) {
+
targetPath = targetPath.replace(`:${paramName}`, actualValue);
+
}
+
}
+
}
+
}
+
+
// Preserve query string for 200, 301, 302 redirects (unless target already has one)
+
if ([200, 301, 302].includes(rule.status) && context?.queryParams && !targetPath.includes('?')) {
+
const queryString = Object.entries(context.queryParams)
+
.map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`)
+
.join('&');
+
if (queryString) {
+
targetPath += `?${queryString}`;
+
}
+
}
+
+
return {
+
rule,
+
targetPath,
+
status: rule.status,
+
};
+
}
+
+
return null;
+
}
+
+
/**
+
* Load redirect rules from a cached site
+
*/
+
export async function loadRedirectRules(did: string, rkey: string): Promise<RedirectRule[]> {
+
const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
+
const redirectsPath = `${CACHE_DIR}/${did}/${rkey}/_redirects`;
+
+
if (!existsSync(redirectsPath)) {
+
return [];
+
}
+
+
try {
+
const content = await readFile(redirectsPath, 'utf-8');
+
return parseRedirectsFile(content);
+
} catch (err) {
+
console.error('Failed to load _redirects file', err);
+
return [];
+
}
+
}
+
+
/**
+
* Parse cookies from Cookie header
+
*/
+
export function parseCookies(cookieHeader?: string): Record<string, string> {
+
if (!cookieHeader) return {};
+
+
const cookies: Record<string, string> = {};
+
const parts = cookieHeader.split(';');
+
+
for (const part of parts) {
+
const [key, ...valueParts] = part.split('=');
+
if (key && valueParts.length > 0) {
+
cookies[key.trim()] = valueParts.join('=').trim();
+
}
+
}
+
+
return cookies;
+
}
+
+
/**
+
* Parse query string into object
+
*/
+
export function parseQueryString(url: string): Record<string, string> {
+
const queryStart = url.indexOf('?');
+
if (queryStart === -1) return {};
+
+
const queryString = url.slice(queryStart + 1);
+
const params: Record<string, string> = {};
+
+
for (const pair of queryString.split('&')) {
+
const [key, value] = pair.split('=');
+
if (key) {
+
params[decodeURIComponent(key)] = value ? decodeURIComponent(value) : '';
+
}
+
}
+
+
return params;
+
}
+
+168 -6
hosting-service/src/server.ts
···
import { lookup } from 'mime-types';
import { logger, observabilityMiddleware, observabilityErrorHandler, logCollector, errorTracker, metricsCollector } from './lib/observability';
import { fileCache, metadataCache, rewrittenHtmlCache, getCacheKey, type FileMetadata } from './lib/cache';
+
import { loadRedirectRules, matchRedirectRule, parseCookies, parseQueryString, type RedirectRule } from './lib/redirects';
const BASE_HOST = process.env.BASE_HOST || 'wisp.place';
···
}
}
+
// Cache for redirect rules (per site)
+
const redirectRulesCache = new Map<string, RedirectRule[]>();
+
+
/**
+
* Clear redirect rules cache for a specific site
+
* Should be called when a site is updated/recached
+
*/
+
export function clearRedirectRulesCache(did: string, rkey: string) {
+
const cacheKey = `${did}:${rkey}`;
+
redirectRulesCache.delete(cacheKey);
+
}
+
// Helper to serve files from cache
-
async function serveFromCache(did: string, rkey: string, filePath: string) {
+
async function serveFromCache(
+
did: string,
+
rkey: string,
+
filePath: string,
+
fullUrl?: string,
+
headers?: Record<string, string>
+
) {
+
// Check for redirect rules first
+
const redirectCacheKey = `${did}:${rkey}`;
+
let redirectRules = redirectRulesCache.get(redirectCacheKey);
+
+
if (redirectRules === undefined) {
+
// Load rules for the first time
+
redirectRules = await loadRedirectRules(did, rkey);
+
redirectRulesCache.set(redirectCacheKey, redirectRules);
+
}
+
+
// Apply redirect rules if any exist
+
if (redirectRules.length > 0) {
+
const requestPath = '/' + (filePath || '');
+
const queryParams = fullUrl ? parseQueryString(fullUrl) : {};
+
const cookies = parseCookies(headers?.['cookie']);
+
+
const redirectMatch = matchRedirectRule(requestPath, redirectRules, {
+
queryParams,
+
headers,
+
cookies,
+
});
+
+
if (redirectMatch) {
+
const { targetPath, status } = redirectMatch;
+
+
// Handle different status codes
+
if (status === 200) {
+
// Rewrite: serve different content but keep URL the same
+
// Remove leading slash for internal path resolution
+
const rewritePath = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath;
+
return serveFileInternal(did, rkey, rewritePath);
+
} else if (status === 301 || status === 302) {
+
// External redirect: change the URL
+
return new Response(null, {
+
status,
+
headers: {
+
'Location': targetPath,
+
'Cache-Control': status === 301 ? 'public, max-age=31536000' : 'public, max-age=0',
+
},
+
});
+
} else if (status === 404) {
+
// Custom 404 page
+
const custom404Path = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath;
+
const response = await serveFileInternal(did, rkey, custom404Path);
+
// Override status to 404
+
return new Response(response.body, {
+
status: 404,
+
headers: response.headers,
+
});
+
}
+
}
+
}
+
+
// No redirect matched, serve normally
+
return serveFileInternal(did, rkey, filePath);
+
}
+
+
// Internal function to serve a file (used by both normal serving and rewrites)
+
async function serveFileInternal(did: string, rkey: string, filePath: string) {
// Default to index.html if path is empty or ends with /
let requestPath = filePath || 'index.html';
if (requestPath.endsWith('/')) {
···
did: string,
rkey: string,
filePath: string,
-
basePath: string
+
basePath: string,
+
fullUrl?: string,
+
headers?: Record<string, string>
) {
+
// Check for redirect rules first
+
const redirectCacheKey = `${did}:${rkey}`;
+
let redirectRules = redirectRulesCache.get(redirectCacheKey);
+
+
if (redirectRules === undefined) {
+
// Load rules for the first time
+
redirectRules = await loadRedirectRules(did, rkey);
+
redirectRulesCache.set(redirectCacheKey, redirectRules);
+
}
+
+
// Apply redirect rules if any exist
+
if (redirectRules.length > 0) {
+
const requestPath = '/' + (filePath || '');
+
const queryParams = fullUrl ? parseQueryString(fullUrl) : {};
+
const cookies = parseCookies(headers?.['cookie']);
+
+
const redirectMatch = matchRedirectRule(requestPath, redirectRules, {
+
queryParams,
+
headers,
+
cookies,
+
});
+
+
if (redirectMatch) {
+
const { targetPath, status } = redirectMatch;
+
+
// Handle different status codes
+
if (status === 200) {
+
// Rewrite: serve different content but keep URL the same
+
const rewritePath = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath;
+
return serveFileInternalWithRewrite(did, rkey, rewritePath, basePath);
+
} else if (status === 301 || status === 302) {
+
// External redirect: change the URL
+
// For sites.wisp.place, we need to adjust the target path to include the base path
+
// unless it's an absolute URL
+
let redirectTarget = targetPath;
+
if (!targetPath.startsWith('http://') && !targetPath.startsWith('https://')) {
+
redirectTarget = basePath + (targetPath.startsWith('/') ? targetPath.slice(1) : targetPath);
+
}
+
return new Response(null, {
+
status,
+
headers: {
+
'Location': redirectTarget,
+
'Cache-Control': status === 301 ? 'public, max-age=31536000' : 'public, max-age=0',
+
},
+
});
+
} else if (status === 404) {
+
// Custom 404 page
+
const custom404Path = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath;
+
const response = await serveFileInternalWithRewrite(did, rkey, custom404Path, basePath);
+
// Override status to 404
+
return new Response(response.body, {
+
status: 404,
+
headers: response.headers,
+
});
+
}
+
}
+
}
+
+
// No redirect matched, serve normally
+
return serveFileInternalWithRewrite(did, rkey, filePath, basePath);
+
}
+
+
// Internal function to serve a file with rewriting
+
async function serveFileInternalWithRewrite(did: string, rkey: string, filePath: string, basePath: string) {
// Default to index.html if path is empty or ends with /
let requestPath = filePath || 'index.html';
if (requestPath.endsWith('/')) {
···
try {
await downloadAndCacheSite(did, rkey, siteData.record, pdsEndpoint, siteData.cid);
+
// Clear redirect rules cache since the site was updated
+
clearRedirectRulesCache(did, rkey);
logger.info('Site cached successfully', { did, rkey });
return true;
} catch (err) {
···
// Serve with HTML path rewriting to handle absolute paths
const basePath = `/${identifier}/${site}/`;
-
return serveFromCacheWithRewrite(did, site, filePath, basePath);
+
const headers: Record<string, string> = {};
+
c.req.raw.headers.forEach((value, key) => {
+
headers[key.toLowerCase()] = value;
+
});
+
return serveFromCacheWithRewrite(did, site, filePath, basePath, c.req.url, headers);
}
// Check if this is a DNS hash subdomain
···
return c.text('Site not found', 404);
}
-
return serveFromCache(customDomain.did, rkey, path);
+
const headers: Record<string, string> = {};
+
c.req.raw.headers.forEach((value, key) => {
+
headers[key.toLowerCase()] = value;
+
});
+
return serveFromCache(customDomain.did, rkey, path, c.req.url, headers);
}
// Route 2: Registered subdomains - /*.wisp.place/*
···
return c.text('Site not found', 404);
}
-
return serveFromCache(domainInfo.did, rkey, path);
+
const headers: Record<string, string> = {};
+
c.req.raw.headers.forEach((value, key) => {
+
headers[key.toLowerCase()] = value;
+
});
+
return serveFromCache(domainInfo.did, rkey, path, c.req.url, headers);
}
// Route 1: Custom domains - /*
···
return c.text('Site not found', 404);
}
-
return serveFromCache(customDomain.did, rkey, path);
+
const headers: Record<string, string> = {};
+
c.req.raw.headers.forEach((value, key) => {
+
headers[key.toLowerCase()] = value;
+
});
+
return serveFromCache(customDomain.did, rkey, path, c.req.url, headers);
});
// Internal observability endpoints (for admin panel)
+1
cli/.gitignore
···
+
test/
.DS_STORE
jacquard/
binaries/
+66
cli/src/cid.rs
···
+
use jacquard_common::types::cid::IpldCid;
+
use sha2::{Digest, Sha256};
+
+
/// Compute CID (Content Identifier) for blob content
+
/// Uses the same algorithm as AT Protocol: CIDv1 with raw codec (0x55) and SHA-256
+
///
+
/// CRITICAL: This must be called on BASE64-ENCODED GZIPPED content, not just gzipped content
+
///
+
/// Based on @atproto/common/src/ipld.ts sha256RawToCid implementation
+
pub fn compute_cid(content: &[u8]) -> String {
+
// Use node crypto to compute sha256 hash (same as AT Protocol)
+
let hash = Sha256::digest(content);
+
+
// Create multihash (code 0x12 = sha2-256)
+
let multihash = multihash::Multihash::wrap(0x12, &hash)
+
.expect("SHA-256 hash should always fit in multihash");
+
+
// Create CIDv1 with raw codec (0x55)
+
let cid = IpldCid::new_v1(0x55, multihash);
+
+
// Convert to base32 string representation
+
cid.to_string_of_base(multibase::Base::Base32Lower)
+
.unwrap_or_else(|_| cid.to_string())
+
}
+
+
#[cfg(test)]
+
mod tests {
+
use super::*;
+
use base64::Engine;
+
+
#[test]
+
fn test_compute_cid() {
+
// Test with a simple string: "hello"
+
let content = b"hello";
+
let cid = compute_cid(content);
+
+
// CID should start with 'baf' for raw codec base32
+
assert!(cid.starts_with("baf"));
+
}
+
+
#[test]
+
fn test_compute_cid_base64_encoded() {
+
// Simulate the actual use case: gzipped then base64 encoded
+
use flate2::write::GzEncoder;
+
use flate2::Compression;
+
use std::io::Write;
+
+
let original = b"hello world";
+
+
// Gzip compress
+
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+
encoder.write_all(original).unwrap();
+
let gzipped = encoder.finish().unwrap();
+
+
// Base64 encode the gzipped data
+
let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes();
+
+
// Compute CID on the base64 bytes
+
let cid = compute_cid(&base64_bytes);
+
+
// Should be a valid CID
+
assert!(cid.starts_with("baf"));
+
assert!(cid.len() > 10);
+
}
+
}
+
+71
cli/src/download.rs
···
+
use base64::Engine;
+
use bytes::Bytes;
+
use flate2::read::GzDecoder;
+
use jacquard_common::types::blob::BlobRef;
+
use miette::IntoDiagnostic;
+
use std::io::Read;
+
use url::Url;
+
+
/// Download a blob from the PDS
+
pub async fn download_blob(pds_url: &Url, blob_ref: &BlobRef<'_>, did: &str) -> miette::Result<Bytes> {
+
// Extract CID from blob ref
+
let cid = blob_ref.blob().r#ref.to_string();
+
+
// Construct blob download URL
+
// The correct endpoint is: /xrpc/com.atproto.sync.getBlob?did={did}&cid={cid}
+
let blob_url = pds_url
+
.join(&format!("/xrpc/com.atproto.sync.getBlob?did={}&cid={}", did, cid))
+
.into_diagnostic()?;
+
+
let client = reqwest::Client::new();
+
let response = client
+
.get(blob_url)
+
.send()
+
.await
+
.into_diagnostic()?;
+
+
if !response.status().is_success() {
+
return Err(miette::miette!(
+
"Failed to download blob: {}",
+
response.status()
+
));
+
}
+
+
let bytes = response.bytes().await.into_diagnostic()?;
+
Ok(bytes)
+
}
+
+
/// Decompress and decode a blob (base64 + gzip)
+
pub fn decompress_blob(data: &[u8], is_base64: bool, is_gzipped: bool) -> miette::Result<Vec<u8>> {
+
let mut current_data = data.to_vec();
+
+
// First, decode base64 if needed
+
if is_base64 {
+
current_data = base64::prelude::BASE64_STANDARD
+
.decode(&current_data)
+
.into_diagnostic()?;
+
}
+
+
// Then, decompress gzip if needed
+
if is_gzipped {
+
let mut decoder = GzDecoder::new(&current_data[..]);
+
let mut decompressed = Vec::new();
+
decoder.read_to_end(&mut decompressed).into_diagnostic()?;
+
current_data = decompressed;
+
}
+
+
Ok(current_data)
+
}
+
+
/// Download and decompress a blob
+
pub async fn download_and_decompress_blob(
+
pds_url: &Url,
+
blob_ref: &BlobRef<'_>,
+
did: &str,
+
is_base64: bool,
+
is_gzipped: bool,
+
) -> miette::Result<Vec<u8>> {
+
let data = download_blob(pds_url, blob_ref, did).await?;
+
decompress_blob(&data, is_base64, is_gzipped)
+
}
+
+109 -16
cli/src/main.rs
···
mod place_wisp;
mod cid;
mod blob_map;
+
mod metadata;
+
mod download;
+
mod pull;
+
mod serve;
-
use clap::Parser;
+
use clap::{Parser, Subcommand};
use jacquard::CowStr;
use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession};
use jacquard::oauth::client::OAuthClient;
···
use place_wisp::fs::*;
#[derive(Parser, Debug)]
-
#[command(author, version, about = "Deploy a static site to wisp.place")]
+
#[command(author, version, about = "wisp.place CLI tool")]
struct Args {
+
#[command(subcommand)]
+
command: Option<Commands>,
+
+
// Deploy arguments (when no subcommand is specified)
/// Handle (e.g., alice.bsky.social), DID, or PDS URL
-
input: CowStr<'static>,
+
#[arg(global = true, conflicts_with = "command")]
+
input: Option<CowStr<'static>>,
/// Path to the directory containing your static site
-
#[arg(short, long, default_value = ".")]
-
path: PathBuf,
+
#[arg(short, long, global = true, conflicts_with = "command")]
+
path: Option<PathBuf>,
/// Site name (defaults to directory name)
-
#[arg(short, long)]
+
#[arg(short, long, global = true, conflicts_with = "command")]
site: Option<String>,
-
/// Path to auth store file (will be created if missing, only used with OAuth)
-
#[arg(long, default_value = "/tmp/wisp-oauth-session.json")]
-
store: String,
+
/// Path to auth store file
+
#[arg(long, global = true, conflicts_with = "command")]
+
store: Option<String>,
-
/// App Password for authentication (alternative to OAuth)
-
#[arg(long)]
+
/// App Password for authentication
+
#[arg(long, global = true, conflicts_with = "command")]
password: Option<CowStr<'static>>,
}
+
#[derive(Subcommand, Debug)]
+
enum Commands {
+
/// Deploy a static site to wisp.place (default command)
+
Deploy {
+
/// Handle (e.g., alice.bsky.social), DID, or PDS URL
+
input: CowStr<'static>,
+
+
/// Path to the directory containing your static site
+
#[arg(short, long, default_value = ".")]
+
path: PathBuf,
+
+
/// Site name (defaults to directory name)
+
#[arg(short, long)]
+
site: Option<String>,
+
+
/// Path to auth store file (will be created if missing, only used with OAuth)
+
#[arg(long, default_value = "/tmp/wisp-oauth-session.json")]
+
store: String,
+
+
/// App Password for authentication (alternative to OAuth)
+
#[arg(long)]
+
password: Option<CowStr<'static>>,
+
},
+
/// Pull a site from the PDS to a local directory
+
Pull {
+
/// Handle (e.g., alice.bsky.social) or DID
+
input: CowStr<'static>,
+
+
/// Site name (record key)
+
#[arg(short, long)]
+
site: String,
+
+
/// Output directory for the downloaded site
+
#[arg(short, long, default_value = ".")]
+
output: PathBuf,
+
},
+
/// Serve a site locally with real-time firehose updates
+
Serve {
+
/// Handle (e.g., alice.bsky.social) or DID
+
input: CowStr<'static>,
+
+
/// Site name (record key)
+
#[arg(short, long)]
+
site: String,
+
+
/// Output directory for the site files
+
#[arg(short, long, default_value = ".")]
+
output: PathBuf,
+
+
/// Port to serve on
+
#[arg(short, long, default_value = "8080")]
+
port: u16,
+
},
+
}
+
#[tokio::main]
async fn main() -> miette::Result<()> {
let args = Args::parse();
-
// Dispatch to appropriate authentication method
-
if let Some(password) = args.password {
-
run_with_app_password(args.input, password, args.path, args.site).await
-
} else {
-
run_with_oauth(args.input, args.store, args.path, args.site).await
+
match args.command {
+
Some(Commands::Deploy { input, path, site, store, password }) => {
+
// Dispatch to appropriate authentication method
+
if let Some(password) = password {
+
run_with_app_password(input, password, path, site).await
+
} else {
+
run_with_oauth(input, store, path, site).await
+
}
+
}
+
Some(Commands::Pull { input, site, output }) => {
+
pull::pull_site(input, CowStr::from(site), output).await
+
}
+
Some(Commands::Serve { input, site, output, port }) => {
+
serve::serve_site(input, CowStr::from(site), output, port).await
+
}
+
None => {
+
// Legacy mode: if input is provided, assume deploy command
+
if let Some(input) = args.input {
+
let path = args.path.unwrap_or_else(|| PathBuf::from("."));
+
let store = args.store.unwrap_or_else(|| "/tmp/wisp-oauth-session.json".to_string());
+
+
// Dispatch to appropriate authentication method
+
if let Some(password) = args.password {
+
run_with_app_password(input, password, path, args.site).await
+
} else {
+
run_with_oauth(input, store, path, args.site).await
+
}
+
} else {
+
// No command and no input, show help
+
use clap::CommandFactory;
+
Args::command().print_help().into_diagnostic()?;
+
Ok(())
+
}
+
}
}
}
+46
cli/src/metadata.rs
···
+
use serde::{Deserialize, Serialize};
+
use std::collections::HashMap;
+
use std::path::Path;
+
use miette::IntoDiagnostic;
+
+
/// Metadata tracking file CIDs for incremental updates
+
#[derive(Debug, Clone, Serialize, Deserialize)]
+
pub struct SiteMetadata {
+
/// Record CID from the PDS
+
pub record_cid: String,
+
/// Map of file paths to their blob CIDs
+
pub file_cids: HashMap<String, String>,
+
/// Timestamp when the site was last synced
+
pub last_sync: i64,
+
}
+
+
impl SiteMetadata {
+
pub fn new(record_cid: String, file_cids: HashMap<String, String>) -> Self {
+
Self {
+
record_cid,
+
file_cids,
+
last_sync: chrono::Utc::now().timestamp(),
+
}
+
}
+
+
/// Load metadata from a directory
+
pub fn load(dir: &Path) -> miette::Result<Option<Self>> {
+
let metadata_path = dir.join(".wisp-metadata.json");
+
if !metadata_path.exists() {
+
return Ok(None);
+
}
+
+
let contents = std::fs::read_to_string(&metadata_path).into_diagnostic()?;
+
let metadata: SiteMetadata = serde_json::from_str(&contents).into_diagnostic()?;
+
Ok(Some(metadata))
+
}
+
+
/// Save metadata to a directory
+
pub fn save(&self, dir: &Path) -> miette::Result<()> {
+
let metadata_path = dir.join(".wisp-metadata.json");
+
let contents = serde_json::to_string_pretty(self).into_diagnostic()?;
+
std::fs::write(&metadata_path, contents).into_diagnostic()?;
+
Ok(())
+
}
+
}
+
+305
cli/src/pull.rs
···
+
use crate::blob_map;
+
use crate::download;
+
use crate::metadata::SiteMetadata;
+
use crate::place_wisp::fs::*;
+
use jacquard::CowStr;
+
use jacquard::prelude::IdentityResolver;
+
use jacquard_common::types::string::Did;
+
use jacquard_common::xrpc::XrpcExt;
+
use jacquard_identity::PublicResolver;
+
use miette::IntoDiagnostic;
+
use std::collections::HashMap;
+
use std::path::{Path, PathBuf};
+
use url::Url;
+
+
/// Pull a site from the PDS to a local directory
+
pub async fn pull_site(
+
input: CowStr<'static>,
+
rkey: CowStr<'static>,
+
output_dir: PathBuf,
+
) -> miette::Result<()> {
+
println!("Pulling site {} from {}...", rkey, input);
+
+
// Resolve handle to DID if needed
+
let resolver = PublicResolver::default();
+
let did = if input.starts_with("did:") {
+
Did::new(&input).into_diagnostic()?
+
} else {
+
// It's a handle, resolve it
+
let handle = jacquard_common::types::string::Handle::new(&input).into_diagnostic()?;
+
resolver.resolve_handle(&handle).await.into_diagnostic()?
+
};
+
+
// Resolve PDS endpoint for the DID
+
let pds_url = resolver.pds_for_did(&did).await.into_diagnostic()?;
+
println!("Resolved PDS: {}", pds_url);
+
+
// Fetch the place.wisp.fs record
+
+
println!("Fetching record from PDS...");
+
let client = reqwest::Client::new();
+
+
// Use com.atproto.repo.getRecord
+
use jacquard::api::com_atproto::repo::get_record::GetRecord;
+
use jacquard_common::types::string::Rkey as RkeyType;
+
let rkey_parsed = RkeyType::new(&rkey).into_diagnostic()?;
+
+
use jacquard_common::types::ident::AtIdentifier;
+
use jacquard_common::types::string::RecordKey;
+
let request = GetRecord::new()
+
.repo(AtIdentifier::Did(did.clone()))
+
.collection(CowStr::from("place.wisp.fs"))
+
.rkey(RecordKey::from(rkey_parsed))
+
.build();
+
+
let response = client
+
.xrpc(pds_url.clone())
+
.send(&request)
+
.await
+
.into_diagnostic()?;
+
+
let record_output = response.into_output().into_diagnostic()?;
+
let record_cid = record_output.cid.as_ref().map(|c| c.to_string()).unwrap_or_default();
+
+
// Parse the record value as Fs
+
use jacquard_common::types::value::from_data;
+
let fs_record: Fs = from_data(&record_output.value).into_diagnostic()?;
+
+
let file_count = fs_record.file_count.map(|c| c.to_string()).unwrap_or_else(|| "?".to_string());
+
println!("Found site '{}' with {} files", fs_record.site, file_count);
+
+
// Load existing metadata for incremental updates
+
let existing_metadata = SiteMetadata::load(&output_dir)?;
+
let existing_file_cids = existing_metadata
+
.as_ref()
+
.map(|m| m.file_cids.clone())
+
.unwrap_or_default();
+
+
// Extract blob map from the new manifest
+
let new_blob_map = blob_map::extract_blob_map(&fs_record.root);
+
let new_file_cids: HashMap<String, String> = new_blob_map
+
.iter()
+
.map(|(path, (_blob_ref, cid))| (path.clone(), cid.clone()))
+
.collect();
+
+
// Clean up any leftover temp directories from previous failed attempts
+
let parent = output_dir.parent().unwrap_or_else(|| std::path::Path::new("."));
+
let output_name = output_dir.file_name().unwrap_or_else(|| std::ffi::OsStr::new("site")).to_string_lossy();
+
let temp_prefix = format!(".tmp-{}-", output_name);
+
+
if let Ok(entries) = parent.read_dir() {
+
for entry in entries.flatten() {
+
let name = entry.file_name();
+
if name.to_string_lossy().starts_with(&temp_prefix) {
+
let _ = std::fs::remove_dir_all(entry.path());
+
}
+
}
+
}
+
+
// Check if we need to update (but only if output directory actually exists with files)
+
if let Some(metadata) = &existing_metadata {
+
if metadata.record_cid == record_cid {
+
// Verify that the output directory actually exists and has content
+
let has_content = output_dir.exists() &&
+
output_dir.read_dir()
+
.map(|mut entries| entries.any(|e| {
+
if let Ok(entry) = e {
+
!entry.file_name().to_string_lossy().starts_with(".wisp-metadata")
+
} else {
+
false
+
}
+
}))
+
.unwrap_or(false);
+
+
if has_content {
+
println!("Site is already up to date!");
+
return Ok(());
+
}
+
}
+
}
+
+
// Create temporary directory for atomic update
+
// Place temp dir in parent directory to avoid issues with non-existent output_dir
+
let parent = output_dir.parent().unwrap_or_else(|| std::path::Path::new("."));
+
let temp_dir_name = format!(
+
".tmp-{}-{}",
+
output_dir.file_name().unwrap_or_else(|| std::ffi::OsStr::new("site")).to_string_lossy(),
+
chrono::Utc::now().timestamp()
+
);
+
let temp_dir = parent.join(temp_dir_name);
+
std::fs::create_dir_all(&temp_dir).into_diagnostic()?;
+
+
println!("Downloading files...");
+
let mut downloaded = 0;
+
let mut reused = 0;
+
+
// Download files recursively
+
let download_result = download_directory(
+
&fs_record.root,
+
&temp_dir,
+
&pds_url,
+
did.as_str(),
+
&new_blob_map,
+
&existing_file_cids,
+
&output_dir,
+
String::new(),
+
&mut downloaded,
+
&mut reused,
+
)
+
.await;
+
+
// If download failed, clean up temp directory
+
if let Err(e) = download_result {
+
let _ = std::fs::remove_dir_all(&temp_dir);
+
return Err(e);
+
}
+
+
println!(
+
"Downloaded {} files, reused {} files",
+
downloaded, reused
+
);
+
+
// Save metadata
+
let metadata = SiteMetadata::new(record_cid, new_file_cids);
+
metadata.save(&temp_dir)?;
+
+
// Move files from temp to output directory
+
let output_abs = std::fs::canonicalize(&output_dir).unwrap_or_else(|_| output_dir.clone());
+
let current_dir = std::env::current_dir().into_diagnostic()?;
+
+
// Special handling for pulling to current directory
+
if output_abs == current_dir {
+
// Move files from temp to current directory
+
for entry in std::fs::read_dir(&temp_dir).into_diagnostic()? {
+
let entry = entry.into_diagnostic()?;
+
let dest = current_dir.join(entry.file_name());
+
+
// Remove existing file/dir if it exists
+
if dest.exists() {
+
if dest.is_dir() {
+
std::fs::remove_dir_all(&dest).into_diagnostic()?;
+
} else {
+
std::fs::remove_file(&dest).into_diagnostic()?;
+
}
+
}
+
+
// Move from temp to current dir
+
std::fs::rename(entry.path(), dest).into_diagnostic()?;
+
}
+
+
// Clean up temp directory
+
std::fs::remove_dir_all(&temp_dir).into_diagnostic()?;
+
} else {
+
// If output directory exists and has content, remove it first
+
if output_dir.exists() {
+
std::fs::remove_dir_all(&output_dir).into_diagnostic()?;
+
}
+
+
// Ensure parent directory exists
+
if let Some(parent) = output_dir.parent() {
+
if !parent.as_os_str().is_empty() && !parent.exists() {
+
std::fs::create_dir_all(parent).into_diagnostic()?;
+
}
+
}
+
+
// Rename temp to final location
+
match std::fs::rename(&temp_dir, &output_dir) {
+
Ok(_) => {},
+
Err(e) => {
+
// Clean up temp directory on failure
+
let _ = std::fs::remove_dir_all(&temp_dir);
+
return Err(miette::miette!("Failed to move temp directory: {}", e));
+
}
+
}
+
}
+
+
println!("✓ Site pulled successfully to {}", output_dir.display());
+
+
Ok(())
+
}
+
+
/// Recursively download a directory
+
fn download_directory<'a>(
+
dir: &'a Directory<'_>,
+
output_dir: &'a Path,
+
pds_url: &'a Url,
+
did: &'a str,
+
new_blob_map: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>,
+
existing_file_cids: &'a HashMap<String, String>,
+
existing_output_dir: &'a Path,
+
path_prefix: String,
+
downloaded: &'a mut usize,
+
reused: &'a mut usize,
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<()>> + Send + 'a>> {
+
Box::pin(async move {
+
for entry in &dir.entries {
+
let entry_name = entry.name.as_str();
+
let current_path = if path_prefix.is_empty() {
+
entry_name.to_string()
+
} else {
+
format!("{}/{}", path_prefix, entry_name)
+
};
+
+
match &entry.node {
+
EntryNode::File(file) => {
+
let output_path = output_dir.join(entry_name);
+
+
// Check if file CID matches existing
+
if let Some((_blob_ref, new_cid)) = new_blob_map.get(&current_path) {
+
if let Some(existing_cid) = existing_file_cids.get(&current_path) {
+
if existing_cid == new_cid {
+
// File unchanged, copy from existing directory
+
let existing_path = existing_output_dir.join(&current_path);
+
if existing_path.exists() {
+
std::fs::copy(&existing_path, &output_path).into_diagnostic()?;
+
*reused += 1;
+
println!(" ✓ Reused {}", current_path);
+
continue;
+
}
+
}
+
}
+
}
+
+
// File is new or changed, download it
+
println!(" ↓ Downloading {}", current_path);
+
let data = download::download_and_decompress_blob(
+
pds_url,
+
&file.blob,
+
did,
+
file.base64.unwrap_or(false),
+
file.encoding.as_ref().map(|e| e.as_str() == "gzip").unwrap_or(false),
+
)
+
.await?;
+
+
std::fs::write(&output_path, data).into_diagnostic()?;
+
*downloaded += 1;
+
}
+
EntryNode::Directory(subdir) => {
+
let subdir_path = output_dir.join(entry_name);
+
std::fs::create_dir_all(&subdir_path).into_diagnostic()?;
+
+
download_directory(
+
subdir,
+
&subdir_path,
+
pds_url,
+
did,
+
new_blob_map,
+
existing_file_cids,
+
existing_output_dir,
+
current_path,
+
downloaded,
+
reused,
+
)
+
.await?;
+
}
+
EntryNode::Unknown(_) => {
+
// Skip unknown node types
+
println!(" ⚠ Skipping unknown node type for {}", current_path);
+
}
+
}
+
}
+
+
Ok(())
+
})
+
}
+
+202
cli/src/serve.rs
···
+
use crate::pull::pull_site;
+
use axum::Router;
+
use jacquard::CowStr;
+
use jacquard_common::jetstream::{CommitOperation, JetstreamMessage, JetstreamParams};
+
use jacquard_common::types::string::Did;
+
use jacquard_common::xrpc::{SubscriptionClient, TungsteniteSubscriptionClient};
+
use miette::IntoDiagnostic;
+
use n0_future::StreamExt;
+
use std::path::PathBuf;
+
use std::sync::Arc;
+
use tokio::sync::RwLock;
+
use tower_http::compression::CompressionLayer;
+
use tower_http::services::ServeDir;
+
use url::Url;
+
+
/// Shared state for the server
+
#[derive(Clone)]
+
struct ServerState {
+
did: CowStr<'static>,
+
rkey: CowStr<'static>,
+
output_dir: PathBuf,
+
last_cid: Arc<RwLock<Option<String>>>,
+
}
+
+
/// Serve a site locally with real-time firehose updates
+
pub async fn serve_site(
+
input: CowStr<'static>,
+
rkey: CowStr<'static>,
+
output_dir: PathBuf,
+
port: u16,
+
) -> miette::Result<()> {
+
println!("Serving site {} from {} on port {}...", rkey, input, port);
+
+
// Resolve handle to DID if needed
+
use jacquard_identity::PublicResolver;
+
use jacquard::prelude::IdentityResolver;
+
+
let resolver = PublicResolver::default();
+
let did = if input.starts_with("did:") {
+
Did::new(&input).into_diagnostic()?
+
} else {
+
// It's a handle, resolve it
+
let handle = jacquard_common::types::string::Handle::new(&input).into_diagnostic()?;
+
resolver.resolve_handle(&handle).await.into_diagnostic()?
+
};
+
+
println!("Resolved to DID: {}", did.as_str());
+
+
// Create output directory if it doesn't exist
+
std::fs::create_dir_all(&output_dir).into_diagnostic()?;
+
+
// Initial pull of the site
+
println!("Performing initial pull...");
+
let did_str = CowStr::from(did.as_str().to_string());
+
pull_site(did_str.clone(), rkey.clone(), output_dir.clone()).await?;
+
+
// Create shared state
+
let state = ServerState {
+
did: did_str.clone(),
+
rkey: rkey.clone(),
+
output_dir: output_dir.clone(),
+
last_cid: Arc::new(RwLock::new(None)),
+
};
+
+
// Start firehose listener in background
+
let firehose_state = state.clone();
+
tokio::spawn(async move {
+
if let Err(e) = watch_firehose(firehose_state).await {
+
eprintln!("Firehose error: {}", e);
+
}
+
});
+
+
// Create HTTP server with gzip compression
+
let app = Router::new()
+
.fallback_service(
+
ServeDir::new(&output_dir)
+
.precompressed_gzip()
+
)
+
.layer(CompressionLayer::new())
+
.with_state(state);
+
+
let addr = format!("0.0.0.0:{}", port);
+
let listener = tokio::net::TcpListener::bind(&addr)
+
.await
+
.into_diagnostic()?;
+
+
println!("\n✓ Server running at http://localhost:{}", port);
+
println!(" Watching for updates on the firehose...\n");
+
+
axum::serve(listener, app).await.into_diagnostic()?;
+
+
Ok(())
+
}
+
+
/// Watch the firehose for updates to the specific site
+
fn watch_firehose(state: ServerState) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<()>> + Send>> {
+
Box::pin(async move {
+
let jetstream_url = Url::parse("wss://jetstream1.us-east.fire.hose.cam")
+
.into_diagnostic()?;
+
+
println!("[Firehose] Connecting to Jetstream...");
+
+
// Create subscription client
+
let client = TungsteniteSubscriptionClient::from_base_uri(jetstream_url);
+
+
// Subscribe with no filters (we'll filter manually)
+
// Jetstream doesn't support filtering by collection in the params builder
+
let params = JetstreamParams::new().build();
+
+
let stream = client.subscribe(&params).await.into_diagnostic()?;
+
println!("[Firehose] Connected! Watching for updates...");
+
+
// Convert to typed message stream
+
let (_sink, mut messages) = stream.into_stream();
+
+
loop {
+
match messages.next().await {
+
Some(Ok(msg)) => {
+
if let Err(e) = handle_firehose_message(&state, msg).await {
+
eprintln!("[Firehose] Error handling message: {}", e);
+
}
+
}
+
Some(Err(e)) => {
+
eprintln!("[Firehose] Stream error: {}", e);
+
// Try to reconnect after a delay
+
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
+
return Box::pin(watch_firehose(state)).await;
+
}
+
None => {
+
println!("[Firehose] Stream ended, reconnecting...");
+
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
+
return Box::pin(watch_firehose(state)).await;
+
}
+
}
+
}
+
})
+
}
+
+
/// Handle a firehose message
+
async fn handle_firehose_message(
+
state: &ServerState,
+
msg: JetstreamMessage<'_>,
+
) -> miette::Result<()> {
+
match msg {
+
JetstreamMessage::Commit {
+
did,
+
commit,
+
..
+
} => {
+
// Check if this is our site
+
if did.as_str() == state.did.as_str()
+
&& commit.collection.as_str() == "place.wisp.fs"
+
&& commit.rkey.as_str() == state.rkey.as_str()
+
{
+
match commit.operation {
+
CommitOperation::Create | CommitOperation::Update => {
+
let new_cid = commit.cid.as_ref().map(|c| c.to_string());
+
+
// Check if CID changed
+
let should_update = {
+
let last_cid = state.last_cid.read().await;
+
new_cid != *last_cid
+
};
+
+
if should_update {
+
println!("\n[Update] Detected change to site {} (CID: {:?})", state.rkey, new_cid);
+
println!("[Update] Pulling latest version...");
+
+
// Pull the updated site
+
match pull_site(
+
state.did.clone(),
+
state.rkey.clone(),
+
state.output_dir.clone(),
+
)
+
.await
+
{
+
Ok(_) => {
+
// Update last CID
+
let mut last_cid = state.last_cid.write().await;
+
*last_cid = new_cid;
+
println!("[Update] ✓ Site updated successfully!\n");
+
}
+
Err(e) => {
+
eprintln!("[Update] Failed to pull site: {}", e);
+
}
+
}
+
}
+
}
+
CommitOperation::Delete => {
+
println!("\n[Update] Site {} was deleted", state.rkey);
+
}
+
}
+
}
+
}
+
_ => {
+
// Ignore identity and account messages
+
}
+
}
+
+
Ok(())
+
}
+
-3
.gitmodules
···
-
[submodule "cli/jacquard"]
-
path = cli/jacquard
-
url = https://tangled.org/@nonbinary.computer/jacquard
-1
cli/jacquard
···
-
Subproject commit d533482a61f540586b1eea620b8e9a01a59d5650
+1 -1
cli/Cargo.toml
···
[package]
name = "wisp-cli"
-
version = "0.1.0"
+
version = "0.2.0"
edition = "2024"
[features]
+28 -1
crates.nix
···
targets.x86_64-pc-windows-gnu.latest.rust-std
targets.x86_64-unknown-linux-gnu.latest.rust-std
targets.aarch64-apple-darwin.latest.rust-std
+
targets.aarch64-unknown-linux-gnu.latest.rust-std
];
# configure crates
nci.crates."wisp-cli" = {
···
dev.runTests = false;
release.runTests = false;
};
-
targets."x86_64-unknown-linux-gnu" = {
+
targets."x86_64-unknown-linux-gnu" = let
+
targetPkgs = pkgs.pkgsCross.gnu64;
+
targetCC = targetPkgs.stdenv.cc;
+
targetCargoEnvVarTarget = targetPkgs.stdenv.hostPlatform.rust.cargoEnvVarTarget;
+
in rec {
default = true;
+
depsDrvConfig.mkDerivation = {
+
nativeBuildInputs = [targetCC];
+
};
+
depsDrvConfig.env = rec {
+
TARGET_CC = "${targetCC.targetPrefix}cc";
+
"CARGO_TARGET_${targetCargoEnvVarTarget}_LINKER" = TARGET_CC;
+
};
+
drvConfig = depsDrvConfig;
};
targets."x86_64-pc-windows-gnu" = let
targetPkgs = pkgs.pkgsCross.mingwW64;
···
};
drvConfig = depsDrvConfig;
};
+
targets."aarch64-unknown-linux-gnu" = let
+
targetPkgs = pkgs.pkgsCross.aarch64-multiplatform;
+
targetCC = targetPkgs.stdenv.cc;
+
targetCargoEnvVarTarget = targetPkgs.stdenv.hostPlatform.rust.cargoEnvVarTarget;
+
in rec {
+
depsDrvConfig.mkDerivation = {
+
nativeBuildInputs = [targetCC];
+
};
+
depsDrvConfig.env = rec {
+
TARGET_CC = "${targetCC.targetPrefix}cc";
+
"CARGO_TARGET_${targetCargoEnvVarTarget}_LINKER" = TARGET_CC;
+
};
+
drvConfig = depsDrvConfig;
+
};
};
};
}
+17 -2
flake.nix
···
...
}: let
crateOutputs = config.nci.outputs."wisp-cli";
+
mkRenamedPackage = name: pkg: pkgs.runCommand name {} ''
+
mkdir -p $out/bin
+
cp ${pkg}/bin/wisp-cli $out/bin/${name}
+
'';
in {
devShells.default = crateOutputs.devShell;
packages.default = crateOutputs.packages.release;
-
packages.wisp-cli-windows = crateOutputs.allTargets."x86_64-pc-windows-gnu".packages.release;
-
packages.wisp-cli-darwin = crateOutputs.allTargets."aarch64-apple-darwin".packages.release;
+
packages.wisp-cli-x86_64-linux = mkRenamedPackage "wisp-cli-x86_64-linux" crateOutputs.packages.release;
+
packages.wisp-cli-aarch64-linux = mkRenamedPackage "wisp-cli-aarch64-linux" crateOutputs.allTargets."aarch64-unknown-linux-gnu".packages.release;
+
packages.wisp-cli-x86_64-windows = mkRenamedPackage "wisp-cli-x86_64-windows.exe" crateOutputs.allTargets."x86_64-pc-windows-gnu".packages.release;
+
packages.wisp-cli-aarch64-darwin = mkRenamedPackage "wisp-cli-aarch64-darwin" crateOutputs.allTargets."aarch64-apple-darwin".packages.release;
+
packages.all = pkgs.symlinkJoin {
+
name = "wisp-cli-all";
+
paths = [
+
config.packages.wisp-cli-x86_64-linux
+
config.packages.wisp-cli-aarch64-linux
+
config.packages.wisp-cli-x86_64-windows
+
config.packages.wisp-cli-aarch64-darwin
+
];
+
};
};
};
}