From 9a8033813d927f3db006be4ffd5be1216c781b0b Mon Sep 17 00:00:00 2001 From: "@nekomimi.pet" Date: Wed, 12 Nov 2025 17:28:10 -0500 Subject: [PATCH 1/2] init support for redirects file --- README.md | 32 +- hosting-service/EXAMPLE.md | 123 ------- hosting-service/example-_redirects | 134 +++++++ hosting-service/src/lib/redirects.test.ts | 215 +++++++++++ hosting-service/src/lib/redirects.ts | 413 ++++++++++++++++++++++ hosting-service/src/server.ts | 174 ++++++++- 6 files changed, 961 insertions(+), 130 deletions(-) delete mode 100644 hosting-service/EXAMPLE.md create mode 100644 hosting-service/example-_redirects create mode 100644 hosting-service/src/lib/redirects.test.ts create mode 100644 hosting-service/src/lib/redirects.ts diff --git a/README.md b/README.md index 0271aa3..211f12d 100644 --- a/README.md +++ b/README.md @@ -50,10 +50,40 @@ cd cli cargo build ``` +## Features + +### URL Redirects and Rewrites + +The hosting service supports Netlify-style `_redirects` files for managing URLs. Place a `_redirects` file in your site root to enable: + +- **301/302 Redirects**: Permanent and temporary URL redirects +- **200 Rewrites**: Serve different content without changing the URL +- **404 Custom Pages**: Custom error pages for specific paths +- **Splats & Placeholders**: Dynamic path matching (`/blog/:year/:month/:day`, `/news/*`) +- **Query Parameter Matching**: Redirect based on URL parameters +- **Conditional Redirects**: Route by country, language, or cookie presence +- **Force Redirects**: Override existing files with redirects + +Example `_redirects`: +``` +# Single-page app routing (React, Vue, etc.) +/* /index.html 200 + +# Simple redirects +/home / +/old-blog/* /blog/:splat + +# API proxy +/api/* https://api.example.com/:splat 200 + +# Country-based routing +/ /us/ 302 Country=us +/ /uk/ 302 Country=gb +``` + ## Limits - Max file size: 100MB (PDS limit) -- Max site size: 300MB - Max files: 2000 ## Tech Stack diff --git a/hosting-service/EXAMPLE.md b/hosting-service/EXAMPLE.md deleted file mode 100644 index adf7cd9..0000000 --- a/hosting-service/EXAMPLE.md +++ /dev/null @@ -1,123 +0,0 @@ -# HTML Path Rewriting Example - -This document demonstrates how HTML path rewriting works when serving sites via the `/s/:identifier/:site/*` route. - -## Problem - -When you create a static site with absolute paths like `/style.css` or `/images/logo.png`, these paths work fine when served from the root domain. However, when served from a subdirectory like `/s/alice.bsky.social/mysite/`, these absolute paths break because they resolve to the server root instead of the site root. - -## Solution - -The hosting service automatically rewrites absolute paths in HTML files to work correctly in the subdirectory context. - -## Example - -**Original HTML file (index.html):** -```html - - - - - My Site - - - - - -
- Logo - -
- -
-

Welcome

- Hero - -
- - -
-
- - - - -``` - -**When accessed via `/s/alice.bsky.social/mysite/`, the HTML is rewritten to:** -```html - - - - - My Site - - - - - -
- Logo - -
- -
-

Welcome

- Hero - -
- - -
-
- - - - -``` - -## What's Preserved - -Notice that: -- ✅ Absolute paths are rewritten: `/style.css` → `/s/alice.bsky.social/mysite/style.css` -- ✅ External URLs are preserved: `https://example.com` stays the same -- ✅ Anchors are preserved: `#top` stays the same -- ✅ The rewriting is safe and won't break your site - -## Supported Attributes - -The rewriter handles these HTML attributes: -- `src` - images, scripts, iframes, videos, audio -- `href` - links, stylesheets -- `action` - forms -- `data` - objects -- `poster` - video posters -- `srcset` - responsive images - -## Testing Your Site - -To test if your site works with path rewriting: - -1. Upload your site to your PDS as a `place.wisp.fs` record -2. Access it via: `https://hosting.wisp.place/s/YOUR_HANDLE/SITE_NAME/` -3. Check that all resources load correctly - -If you're using relative paths already (like `./style.css` or `../images/logo.png`), they'll work without any rewriting. diff --git a/hosting-service/example-_redirects b/hosting-service/example-_redirects new file mode 100644 index 0000000..901c201 --- /dev/null +++ b/hosting-service/example-_redirects @@ -0,0 +1,134 @@ +# Example _redirects file for Wisp hosting +# Place this file in the root directory of your site as "_redirects" +# Lines starting with # are comments + +# =================================== +# SIMPLE REDIRECTS +# =================================== + +# Redirect home page +# /home / + +# Redirect old URLs to new ones +# /old-blog /blog +# /about-us /about + +# =================================== +# SPLAT REDIRECTS (WILDCARDS) +# =================================== + +# Redirect entire directories +# /news/* /blog/:splat +# /old-site/* /new-site/:splat + +# =================================== +# PLACEHOLDER REDIRECTS +# =================================== + +# Restructure blog URLs +# /blog/:year/:month/:day/:slug /posts/:year-:month-:day/:slug + +# Capture multiple parameters +# /products/:category/:id /shop/:category/item/:id + +# =================================== +# STATUS CODES +# =================================== + +# Permanent redirect (301) - default if not specified +# /permanent-move /new-location 301 + +# Temporary redirect (302) +# /temp-redirect /temp-location 302 + +# Rewrite (200) - serves different content, URL stays the same +# /api/* /functions/:splat 200 + +# Custom 404 page +# /shop/* /shop-closed.html 404 + +# =================================== +# FORCE REDIRECTS +# =================================== + +# Force redirect even if file exists (note the ! after status code) +# /override-file /other-file.html 200! + +# =================================== +# CONDITIONAL REDIRECTS +# =================================== + +# Country-based redirects (ISO 3166-1 alpha-2 codes) +# / /us/ 302 Country=us +# / /uk/ 302 Country=gb +# / /anz/ 302 Country=au,nz + +# Language-based redirects +# /products /en/products 301 Language=en +# /products /de/products 301 Language=de +# /products /fr/products 301 Language=fr + +# Cookie-based redirects (checks if cookie exists) +# /* /legacy/:splat 200 Cookie=is_legacy + +# =================================== +# QUERY PARAMETERS +# =================================== + +# Match specific query parameters +# /store id=:id /blog/:id 301 + +# Multiple parameters +# /search q=:query category=:cat /find/:cat/:query 301 + +# =================================== +# DOMAIN-LEVEL REDIRECTS +# =================================== + +# Redirect to different domain (must include protocol) +# /external https://example.com/path + +# Redirect entire subdomain +# http://blog.example.com/* https://example.com/blog/:splat 301! +# https://blog.example.com/* https://example.com/blog/:splat 301! + +# =================================== +# COMMON PATTERNS +# =================================== + +# Remove .html extensions +# /page.html /page + +# Add trailing slash +# /about /about/ + +# Single-page app fallback (serve index.html for all paths) +# /* /index.html 200 + +# API proxy +# /api/* https://api.example.com/:splat 200 + +# =================================== +# CUSTOM ERROR PAGES +# =================================== + +# Language-specific 404 pages +# /en/* /en/404.html 404 +# /de/* /de/404.html 404 + +# Section-specific 404 pages +# /shop/* /shop/not-found.html 404 +# /blog/* /blog/404.html 404 + +# =================================== +# NOTES +# =================================== +# +# - Rules are processed in order (first match wins) +# - More specific rules should come before general ones +# - Splats (*) can only be used at the end of a path +# - Query parameters are automatically preserved for 200, 301, 302 +# - Trailing slashes are normalized (/ and no / are treated the same) +# - Default status code is 301 if not specified +# + diff --git a/hosting-service/src/lib/redirects.test.ts b/hosting-service/src/lib/redirects.test.ts new file mode 100644 index 0000000..f61d5a3 --- /dev/null +++ b/hosting-service/src/lib/redirects.test.ts @@ -0,0 +1,215 @@ +import { describe, it, expect } from 'bun:test' +import { parseRedirectsFile, matchRedirectRule } from './redirects'; + +describe('parseRedirectsFile', () => { + it('should parse simple redirects', () => { + const content = ` +# Comment line +/old-path /new-path +/home / 301 +`; + const rules = parseRedirectsFile(content); + expect(rules).toHaveLength(2); + expect(rules[0]).toMatchObject({ + from: '/old-path', + to: '/new-path', + status: 301, + force: false, + }); + expect(rules[1]).toMatchObject({ + from: '/home', + to: '/', + status: 301, + force: false, + }); + }); + + it('should parse redirects with different status codes', () => { + const content = ` +/temp-redirect /target 302 +/rewrite /content 200 +/not-found /404 404 +`; + const rules = parseRedirectsFile(content); + expect(rules).toHaveLength(3); + expect(rules[0]?.status).toBe(302); + expect(rules[1]?.status).toBe(200); + expect(rules[2]?.status).toBe(404); + }); + + it('should parse force redirects', () => { + const content = `/force-path /target 301!`; + const rules = parseRedirectsFile(content); + expect(rules[0]?.force).toBe(true); + expect(rules[0]?.status).toBe(301); + }); + + it('should parse splat redirects', () => { + const content = `/news/* /blog/:splat`; + const rules = parseRedirectsFile(content); + expect(rules[0]?.from).toBe('/news/*'); + expect(rules[0]?.to).toBe('/blog/:splat'); + }); + + it('should parse placeholder redirects', () => { + const content = `/blog/:year/:month/:day /posts/:year-:month-:day`; + const rules = parseRedirectsFile(content); + expect(rules[0]?.from).toBe('/blog/:year/:month/:day'); + expect(rules[0]?.to).toBe('/posts/:year-:month-:day'); + }); + + it('should parse country-based redirects', () => { + const content = `/ /anz 302 Country=au,nz`; + const rules = parseRedirectsFile(content); + expect(rules[0]?.conditions?.country).toEqual(['au', 'nz']); + }); + + it('should parse language-based redirects', () => { + const content = `/products /en/products 301 Language=en`; + const rules = parseRedirectsFile(content); + expect(rules[0]?.conditions?.language).toEqual(['en']); + }); + + it('should parse cookie-based redirects', () => { + const content = `/* /legacy/:splat 200 Cookie=is_legacy,my_cookie`; + const rules = parseRedirectsFile(content); + expect(rules[0]?.conditions?.cookie).toEqual(['is_legacy', 'my_cookie']); + }); +}); + +describe('matchRedirectRule', () => { + it('should match exact paths', () => { + const rules = parseRedirectsFile('/old-path /new-path'); + const match = matchRedirectRule('/old-path', rules); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/new-path'); + expect(match?.status).toBe(301); + }); + + it('should match paths with trailing slash', () => { + const rules = parseRedirectsFile('/old-path /new-path'); + const match = matchRedirectRule('/old-path/', rules); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/new-path'); + }); + + it('should match splat patterns', () => { + const rules = parseRedirectsFile('/news/* /blog/:splat'); + const match = matchRedirectRule('/news/2024/01/15/my-post', rules); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/blog/2024/01/15/my-post'); + }); + + it('should match placeholder patterns', () => { + const rules = parseRedirectsFile('/blog/:year/:month/:day /posts/:year-:month-:day'); + const match = matchRedirectRule('/blog/2024/01/15', rules); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/posts/2024-01-15'); + }); + + it('should preserve query strings for 301/302 redirects', () => { + const rules = parseRedirectsFile('/old /new 301'); + const match = matchRedirectRule('/old', rules, { + queryParams: { foo: 'bar', baz: 'qux' }, + }); + expect(match?.targetPath).toContain('?'); + expect(match?.targetPath).toContain('foo=bar'); + expect(match?.targetPath).toContain('baz=qux'); + }); + + it('should match based on query parameters', () => { + const rules = parseRedirectsFile('/store id=:id /blog/:id 301'); + const match = matchRedirectRule('/store', rules, { + queryParams: { id: 'my-post' }, + }); + expect(match).toBeTruthy(); + expect(match?.targetPath).toContain('/blog/my-post'); + }); + + it('should not match when query params are missing', () => { + const rules = parseRedirectsFile('/store id=:id /blog/:id 301'); + const match = matchRedirectRule('/store', rules, { + queryParams: {}, + }); + expect(match).toBeNull(); + }); + + it('should match based on country header', () => { + const rules = parseRedirectsFile('/ /aus 302 Country=au'); + const match = matchRedirectRule('/', rules, { + headers: { 'cf-ipcountry': 'AU' }, + }); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/aus'); + }); + + it('should not match wrong country', () => { + const rules = parseRedirectsFile('/ /aus 302 Country=au'); + const match = matchRedirectRule('/', rules, { + headers: { 'cf-ipcountry': 'US' }, + }); + expect(match).toBeNull(); + }); + + it('should match based on language header', () => { + const rules = parseRedirectsFile('/products /en/products 301 Language=en'); + const match = matchRedirectRule('/products', rules, { + headers: { 'accept-language': 'en-US,en;q=0.9' }, + }); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/en/products'); + }); + + it('should match based on cookie presence', () => { + const rules = parseRedirectsFile('/* /legacy/:splat 200 Cookie=is_legacy'); + const match = matchRedirectRule('/some-path', rules, { + cookies: { is_legacy: 'true' }, + }); + expect(match).toBeTruthy(); + expect(match?.targetPath).toBe('/legacy/some-path'); + }); + + it('should return first matching rule', () => { + const content = ` +/path /first +/path /second +`; + const rules = parseRedirectsFile(content); + const match = matchRedirectRule('/path', rules); + expect(match?.targetPath).toBe('/first'); + }); + + it('should match more specific rules before general ones', () => { + const content = ` +/jobs/customer-ninja /careers/support +/jobs/* /careers/:splat +`; + const rules = parseRedirectsFile(content); + + const match1 = matchRedirectRule('/jobs/customer-ninja', rules); + expect(match1?.targetPath).toBe('/careers/support'); + + const match2 = matchRedirectRule('/jobs/developer', rules); + expect(match2?.targetPath).toBe('/careers/developer'); + }); + + it('should handle SPA routing pattern', () => { + const rules = parseRedirectsFile('/* /index.html 200'); + + // Should match any path + const match1 = matchRedirectRule('/about', rules); + expect(match1).toBeTruthy(); + expect(match1?.targetPath).toBe('/index.html'); + expect(match1?.status).toBe(200); + + const match2 = matchRedirectRule('/users/123/profile', rules); + expect(match2).toBeTruthy(); + expect(match2?.targetPath).toBe('/index.html'); + expect(match2?.status).toBe(200); + + const match3 = matchRedirectRule('/', rules); + expect(match3).toBeTruthy(); + expect(match3?.targetPath).toBe('/index.html'); + }); +}); + diff --git a/hosting-service/src/lib/redirects.ts b/hosting-service/src/lib/redirects.ts new file mode 100644 index 0000000..f3c5273 --- /dev/null +++ b/hosting-service/src/lib/redirects.ts @@ -0,0 +1,413 @@ +import { readFile } from 'fs/promises'; +import { existsSync } from 'fs'; + +export interface RedirectRule { + from: string; + to: string; + status: number; + force: boolean; + conditions?: { + country?: string[]; + language?: string[]; + role?: string[]; + cookie?: string[]; + }; + // For pattern matching + fromPattern?: RegExp; + fromParams?: string[]; // Named parameters from the pattern + queryParams?: Record; // Expected query parameters +} + +export interface RedirectMatch { + rule: RedirectRule; + targetPath: string; + status: number; +} + +/** + * Parse a _redirects file into an array of redirect rules + */ +export function parseRedirectsFile(content: string): RedirectRule[] { + const lines = content.split('\n'); + const rules: RedirectRule[] = []; + + for (let lineNum = 0; lineNum < lines.length; lineNum++) { + const lineRaw = lines[lineNum]; + if (!lineRaw) continue; + + const line = lineRaw.trim(); + + // Skip empty lines and comments + if (!line || line.startsWith('#')) { + continue; + } + + try { + const rule = parseRedirectLine(line); + if (rule && rule.fromPattern) { + rules.push(rule); + } + } catch (err) { + console.warn(`Failed to parse redirect rule on line ${lineNum + 1}: ${line}`, err); + } + } + + return rules; +} + +/** + * Parse a single redirect rule line + * Format: /from [query_params] /to [status] [conditions] + */ +function parseRedirectLine(line: string): RedirectRule | null { + // Split by whitespace, but respect quoted strings (though not commonly used) + const parts = line.split(/\s+/); + + if (parts.length < 2) { + return null; + } + + let idx = 0; + const from = parts[idx++]; + + if (!from) { + return null; + } + + let status = 301; // Default status + let force = false; + const conditions: NonNullable = {}; + const queryParams: Record = {}; + + // Parse query parameters that come before the destination path + // They look like: key=:value (and don't start with /) + while (idx < parts.length) { + const part = parts[idx]; + if (!part) { + idx++; + continue; + } + + // If it starts with / or http, it's the destination path + if (part.startsWith('/') || part.startsWith('http://') || part.startsWith('https://')) { + break; + } + + // If it contains = and comes before the destination, it's a query param + if (part.includes('=')) { + const splitIndex = part.indexOf('='); + const key = part.slice(0, splitIndex); + const value = part.slice(splitIndex + 1); + + if (key && value) { + queryParams[key] = value; + } + idx++; + } else { + // Not a query param, must be destination or something else + break; + } + } + + // Next part should be the destination + if (idx >= parts.length) { + return null; + } + + const to = parts[idx++]; + if (!to) { + return null; + } + + // Parse remaining parts for status code and conditions + for (let i = idx; i < parts.length; i++) { + const part = parts[i]; + + if (!part) continue; + + // Check for status code (with optional ! for force) + if (/^\d+!?$/.test(part)) { + if (part.endsWith('!')) { + force = true; + status = parseInt(part.slice(0, -1)); + } else { + status = parseInt(part); + } + continue; + } + + // Check for condition parameters (Country=, Language=, Role=, Cookie=) + if (part.includes('=')) { + const splitIndex = part.indexOf('='); + const key = part.slice(0, splitIndex); + const value = part.slice(splitIndex + 1); + + if (!key || !value) continue; + + const keyLower = key.toLowerCase(); + + if (keyLower === 'country') { + conditions.country = value.split(',').map(v => v.trim().toLowerCase()); + } else if (keyLower === 'language') { + conditions.language = value.split(',').map(v => v.trim().toLowerCase()); + } else if (keyLower === 'role') { + conditions.role = value.split(',').map(v => v.trim()); + } else if (keyLower === 'cookie') { + conditions.cookie = value.split(',').map(v => v.trim().toLowerCase()); + } + } + } + + // Parse the 'from' pattern + const { pattern, params } = convertPathToRegex(from); + + return { + from, + to, + status, + force, + conditions: Object.keys(conditions).length > 0 ? conditions : undefined, + queryParams: Object.keys(queryParams).length > 0 ? queryParams : undefined, + fromPattern: pattern, + fromParams: params, + }; +} + +/** + * Convert a path pattern with placeholders and splats to a regex + * Examples: + * /blog/:year/:month/:day -> captures year, month, day + * /news/* -> captures splat + */ +function convertPathToRegex(pattern: string): { pattern: RegExp; params: string[] } { + const params: string[] = []; + let regexStr = '^'; + + // Split by query string if present + const pathPart = pattern.split('?')[0] || pattern; + + // Escape special regex characters except * and : + let escaped = pathPart.replace(/[.+^${}()|[\]\\]/g, '\\$&'); + + // Replace :param with named capture groups + escaped = escaped.replace(/:([a-zA-Z_][a-zA-Z0-9_]*)/g, (match, paramName) => { + params.push(paramName); + // Match path segment (everything except / and ?) + return '([^/?]+)'; + }); + + // Replace * with splat capture (matches everything including /) + if (escaped.includes('*')) { + escaped = escaped.replace(/\*/g, '(.*)'); + params.push('splat'); + } + + regexStr += escaped; + + // Make trailing slash optional + if (!regexStr.endsWith('.*')) { + regexStr += '/?'; + } + + regexStr += '$'; + + return { + pattern: new RegExp(regexStr), + params, + }; +} + +/** + * Match a request path against redirect rules + */ +export function matchRedirectRule( + requestPath: string, + rules: RedirectRule[], + context?: { + queryParams?: Record; + headers?: Record; + cookies?: Record; + } +): RedirectMatch | null { + // Normalize path: ensure leading slash, remove trailing slash (except for root) + let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`; + + for (const rule of rules) { + // Check query parameter conditions first (if any) + if (rule.queryParams) { + // If rule requires query params but none provided, skip this rule + if (!context?.queryParams) { + continue; + } + + const queryMatches = Object.entries(rule.queryParams).every(([key, value]) => { + const actualValue = context.queryParams?.[key]; + return actualValue !== undefined; + }); + + if (!queryMatches) { + continue; + } + } + + // Check conditional redirects (country, language, role, cookie) + if (rule.conditions) { + if (rule.conditions.country && context?.headers) { + const cfCountry = context.headers['cf-ipcountry']; + const xCountry = context.headers['x-country']; + const country = (cfCountry?.toLowerCase() || xCountry?.toLowerCase()); + if (!country || !rule.conditions.country.includes(country)) { + continue; + } + } + + if (rule.conditions.language && context?.headers) { + const acceptLang = context.headers['accept-language']; + if (!acceptLang) { + continue; + } + // Parse accept-language header (simplified) + const langs = acceptLang.split(',').map(l => { + const langPart = l.split(';')[0]; + return langPart ? langPart.trim().toLowerCase() : ''; + }).filter(l => l !== ''); + const hasMatch = rule.conditions.language.some(lang => + langs.some(l => l === lang || l.startsWith(lang + '-')) + ); + if (!hasMatch) { + continue; + } + } + + if (rule.conditions.cookie && context?.cookies) { + const hasCookie = rule.conditions.cookie.some(cookieName => + context.cookies && cookieName in context.cookies + ); + if (!hasCookie) { + continue; + } + } + + // Role-based redirects would need JWT verification - skip for now + if (rule.conditions.role) { + continue; + } + } + + // Match the path pattern + const match = rule.fromPattern?.exec(normalizedPath); + if (!match) { + continue; + } + + // Build the target path by replacing placeholders + let targetPath = rule.to; + + // Replace captured parameters + if (rule.fromParams && match.length > 1) { + for (let i = 0; i < rule.fromParams.length; i++) { + const paramName = rule.fromParams[i]; + const paramValue = match[i + 1]; + + if (!paramName || !paramValue) continue; + + if (paramName === 'splat') { + targetPath = targetPath.replace(':splat', paramValue); + } else { + targetPath = targetPath.replace(`:${paramName}`, paramValue); + } + } + } + + // Handle query parameter replacements + if (rule.queryParams && context?.queryParams) { + for (const [key, placeholder] of Object.entries(rule.queryParams)) { + const actualValue = context.queryParams[key]; + if (actualValue && placeholder && placeholder.startsWith(':')) { + const paramName = placeholder.slice(1); + if (paramName) { + targetPath = targetPath.replace(`:${paramName}`, actualValue); + } + } + } + } + + // Preserve query string for 200, 301, 302 redirects (unless target already has one) + if ([200, 301, 302].includes(rule.status) && context?.queryParams && !targetPath.includes('?')) { + const queryString = Object.entries(context.queryParams) + .map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`) + .join('&'); + if (queryString) { + targetPath += `?${queryString}`; + } + } + + return { + rule, + targetPath, + status: rule.status, + }; + } + + return null; +} + +/** + * Load redirect rules from a cached site + */ +export async function loadRedirectRules(did: string, rkey: string): Promise { + const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; + const redirectsPath = `${CACHE_DIR}/${did}/${rkey}/_redirects`; + + if (!existsSync(redirectsPath)) { + return []; + } + + try { + const content = await readFile(redirectsPath, 'utf-8'); + return parseRedirectsFile(content); + } catch (err) { + console.error('Failed to load _redirects file', err); + return []; + } +} + +/** + * Parse cookies from Cookie header + */ +export function parseCookies(cookieHeader?: string): Record { + if (!cookieHeader) return {}; + + const cookies: Record = {}; + const parts = cookieHeader.split(';'); + + for (const part of parts) { + const [key, ...valueParts] = part.split('='); + if (key && valueParts.length > 0) { + cookies[key.trim()] = valueParts.join('=').trim(); + } + } + + return cookies; +} + +/** + * Parse query string into object + */ +export function parseQueryString(url: string): Record { + const queryStart = url.indexOf('?'); + if (queryStart === -1) return {}; + + const queryString = url.slice(queryStart + 1); + const params: Record = {}; + + for (const pair of queryString.split('&')) { + const [key, value] = pair.split('='); + if (key) { + params[decodeURIComponent(key)] = value ? decodeURIComponent(value) : ''; + } + } + + return params; +} + diff --git a/hosting-service/src/server.ts b/hosting-service/src/server.ts index 45971c1..a76a0c8 100644 --- a/hosting-service/src/server.ts +++ b/hosting-service/src/server.ts @@ -7,6 +7,7 @@ import { readFile, access } from 'fs/promises'; import { lookup } from 'mime-types'; import { logger, observabilityMiddleware, observabilityErrorHandler, logCollector, errorTracker, metricsCollector } from './lib/observability'; import { fileCache, metadataCache, rewrittenHtmlCache, getCacheKey, type FileMetadata } from './lib/cache'; +import { loadRedirectRules, matchRedirectRule, parseCookies, parseQueryString, type RedirectRule } from './lib/redirects'; const BASE_HOST = process.env.BASE_HOST || 'wisp.place'; @@ -35,8 +36,85 @@ async function fileExists(path: string): Promise { } } +// Cache for redirect rules (per site) +const redirectRulesCache = new Map(); + +/** + * Clear redirect rules cache for a specific site + * Should be called when a site is updated/recached + */ +export function clearRedirectRulesCache(did: string, rkey: string) { + const cacheKey = `${did}:${rkey}`; + redirectRulesCache.delete(cacheKey); +} + // Helper to serve files from cache -async function serveFromCache(did: string, rkey: string, filePath: string) { +async function serveFromCache( + did: string, + rkey: string, + filePath: string, + fullUrl?: string, + headers?: Record +) { + // Check for redirect rules first + const redirectCacheKey = `${did}:${rkey}`; + let redirectRules = redirectRulesCache.get(redirectCacheKey); + + if (redirectRules === undefined) { + // Load rules for the first time + redirectRules = await loadRedirectRules(did, rkey); + redirectRulesCache.set(redirectCacheKey, redirectRules); + } + + // Apply redirect rules if any exist + if (redirectRules.length > 0) { + const requestPath = '/' + (filePath || ''); + const queryParams = fullUrl ? parseQueryString(fullUrl) : {}; + const cookies = parseCookies(headers?.['cookie']); + + const redirectMatch = matchRedirectRule(requestPath, redirectRules, { + queryParams, + headers, + cookies, + }); + + if (redirectMatch) { + const { targetPath, status } = redirectMatch; + + // Handle different status codes + if (status === 200) { + // Rewrite: serve different content but keep URL the same + // Remove leading slash for internal path resolution + const rewritePath = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; + return serveFileInternal(did, rkey, rewritePath); + } else if (status === 301 || status === 302) { + // External redirect: change the URL + return new Response(null, { + status, + headers: { + 'Location': targetPath, + 'Cache-Control': status === 301 ? 'public, max-age=31536000' : 'public, max-age=0', + }, + }); + } else if (status === 404) { + // Custom 404 page + const custom404Path = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; + const response = await serveFileInternal(did, rkey, custom404Path); + // Override status to 404 + return new Response(response.body, { + status: 404, + headers: response.headers, + }); + } + } + } + + // No redirect matched, serve normally + return serveFileInternal(did, rkey, filePath); +} + +// Internal function to serve a file (used by both normal serving and rewrites) +async function serveFileInternal(did: string, rkey: string, filePath: string) { // Default to index.html if path is empty or ends with / let requestPath = filePath || 'index.html'; if (requestPath.endsWith('/')) { @@ -138,8 +216,74 @@ async function serveFromCacheWithRewrite( did: string, rkey: string, filePath: string, - basePath: string + basePath: string, + fullUrl?: string, + headers?: Record ) { + // Check for redirect rules first + const redirectCacheKey = `${did}:${rkey}`; + let redirectRules = redirectRulesCache.get(redirectCacheKey); + + if (redirectRules === undefined) { + // Load rules for the first time + redirectRules = await loadRedirectRules(did, rkey); + redirectRulesCache.set(redirectCacheKey, redirectRules); + } + + // Apply redirect rules if any exist + if (redirectRules.length > 0) { + const requestPath = '/' + (filePath || ''); + const queryParams = fullUrl ? parseQueryString(fullUrl) : {}; + const cookies = parseCookies(headers?.['cookie']); + + const redirectMatch = matchRedirectRule(requestPath, redirectRules, { + queryParams, + headers, + cookies, + }); + + if (redirectMatch) { + const { targetPath, status } = redirectMatch; + + // Handle different status codes + if (status === 200) { + // Rewrite: serve different content but keep URL the same + const rewritePath = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; + return serveFileInternalWithRewrite(did, rkey, rewritePath, basePath); + } else if (status === 301 || status === 302) { + // External redirect: change the URL + // For sites.wisp.place, we need to adjust the target path to include the base path + // unless it's an absolute URL + let redirectTarget = targetPath; + if (!targetPath.startsWith('http://') && !targetPath.startsWith('https://')) { + redirectTarget = basePath + (targetPath.startsWith('/') ? targetPath.slice(1) : targetPath); + } + return new Response(null, { + status, + headers: { + 'Location': redirectTarget, + 'Cache-Control': status === 301 ? 'public, max-age=31536000' : 'public, max-age=0', + }, + }); + } else if (status === 404) { + // Custom 404 page + const custom404Path = targetPath.startsWith('/') ? targetPath.slice(1) : targetPath; + const response = await serveFileInternalWithRewrite(did, rkey, custom404Path, basePath); + // Override status to 404 + return new Response(response.body, { + status: 404, + headers: response.headers, + }); + } + } + } + + // No redirect matched, serve normally + return serveFileInternalWithRewrite(did, rkey, filePath, basePath); +} + +// Internal function to serve a file with rewriting +async function serveFileInternalWithRewrite(did: string, rkey: string, filePath: string, basePath: string) { // Default to index.html if path is empty or ends with / let requestPath = filePath || 'index.html'; if (requestPath.endsWith('/')) { @@ -317,6 +461,8 @@ async function ensureSiteCached(did: string, rkey: string): Promise { try { await downloadAndCacheSite(did, rkey, siteData.record, pdsEndpoint, siteData.cid); + // Clear redirect rules cache since the site was updated + clearRedirectRulesCache(did, rkey); logger.info('Site cached successfully', { did, rkey }); return true; } catch (err) { @@ -384,7 +530,11 @@ app.get('/*', async (c) => { // Serve with HTML path rewriting to handle absolute paths const basePath = `/${identifier}/${site}/`; - return serveFromCacheWithRewrite(did, site, filePath, basePath); + const headers: Record = {}; + c.req.raw.headers.forEach((value, key) => { + headers[key.toLowerCase()] = value; + }); + return serveFromCacheWithRewrite(did, site, filePath, basePath, c.req.url, headers); } // Check if this is a DNS hash subdomain @@ -420,7 +570,11 @@ app.get('/*', async (c) => { return c.text('Site not found', 404); } - return serveFromCache(customDomain.did, rkey, path); + const headers: Record = {}; + c.req.raw.headers.forEach((value, key) => { + headers[key.toLowerCase()] = value; + }); + return serveFromCache(customDomain.did, rkey, path, c.req.url, headers); } // Route 2: Registered subdomains - /*.wisp.place/* @@ -444,7 +598,11 @@ app.get('/*', async (c) => { return c.text('Site not found', 404); } - return serveFromCache(domainInfo.did, rkey, path); + const headers: Record = {}; + c.req.raw.headers.forEach((value, key) => { + headers[key.toLowerCase()] = value; + }); + return serveFromCache(domainInfo.did, rkey, path, c.req.url, headers); } // Route 1: Custom domains - /* @@ -467,7 +625,11 @@ app.get('/*', async (c) => { return c.text('Site not found', 404); } - return serveFromCache(customDomain.did, rkey, path); + const headers: Record = {}; + c.req.raw.headers.forEach((value, key) => { + headers[key.toLowerCase()] = value; + }); + return serveFromCache(customDomain.did, rkey, path, c.req.url, headers); }); // Internal observability endpoints (for admin panel) -- 2.50.1 (Apple Git-155) From f1f70b3b22ddf300959c8855fb721e139b9ec8a6 Mon Sep 17 00:00:00 2001 From: "@nekomimi.pet" Date: Wed, 12 Nov 2025 18:33:31 -0500 Subject: [PATCH 2/2] Add support for existing blob reuse in deployment process --- cli/.gitignore | 1 + cli/Cargo.lock | 3 + cli/Cargo.toml | 3 + cli/src/blob_map.rs | 92 +++++++++++++++++++++++++ cli/src/cid.rs | 66 ++++++++++++++++++ cli/src/main.rs | 159 +++++++++++++++++++++++++++++++++----------- 6 files changed, 286 insertions(+), 38 deletions(-) create mode 100644 cli/src/blob_map.rs create mode 100644 cli/src/cid.rs diff --git a/cli/.gitignore b/cli/.gitignore index fcd9e40..15fe010 100644 --- a/cli/.gitignore +++ b/cli/.gitignore @@ -1,3 +1,4 @@ +test/ .DS_STORE jacquard/ binaries/ diff --git a/cli/Cargo.lock b/cli/Cargo.lock index 4b0ba8b..a100cf6 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -4385,10 +4385,13 @@ dependencies = [ "jacquard-oauth", "miette", "mime_guess", + "multibase", + "multihash", "reqwest", "rustversion", "serde", "serde_json", + "sha2", "shellexpand", "tokio", "walkdir", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 99493fb..6e0d1e2 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -30,3 +30,6 @@ walkdir = "2.5" mime_guess = "2.0" bytes = "1.10" futures = "0.3.31" +multihash = "0.19.3" +multibase = "0.9" +sha2 = "0.10" diff --git a/cli/src/blob_map.rs b/cli/src/blob_map.rs new file mode 100644 index 0000000..93c86bd --- /dev/null +++ b/cli/src/blob_map.rs @@ -0,0 +1,92 @@ +use jacquard_common::types::blob::BlobRef; +use jacquard_common::IntoStatic; +use std::collections::HashMap; + +use crate::place_wisp::fs::{Directory, EntryNode}; + +/// Extract blob information from a directory tree +/// Returns a map of file paths to their blob refs and CIDs +/// +/// This mirrors the TypeScript implementation in src/lib/wisp-utils.ts lines 275-302 +pub fn extract_blob_map( + directory: &Directory, +) -> HashMap, String)> { + extract_blob_map_recursive(directory, String::new()) +} + +fn extract_blob_map_recursive( + directory: &Directory, + current_path: String, +) -> HashMap, String)> { + let mut blob_map = HashMap::new(); + + for entry in &directory.entries { + let full_path = if current_path.is_empty() { + entry.name.to_string() + } else { + format!("{}/{}", current_path, entry.name) + }; + + match &entry.node { + EntryNode::File(file_node) => { + // Extract CID from blob ref + // BlobRef is an enum with Blob variant, which has a ref field (CidLink) + let blob_ref = &file_node.blob; + let cid_string = blob_ref.blob().r#ref.to_string(); + + // Store both normalized and full paths + // Normalize by removing base folder prefix (e.g., "cobblemon/index.html" -> "index.html") + let normalized_path = normalize_path(&full_path); + + blob_map.insert( + normalized_path.clone(), + (blob_ref.clone().into_static(), cid_string.clone()) + ); + + // Also store the full path for matching + if normalized_path != full_path { + blob_map.insert( + full_path, + (blob_ref.clone().into_static(), cid_string) + ); + } + } + EntryNode::Directory(subdir) => { + let sub_map = extract_blob_map_recursive(subdir, full_path); + blob_map.extend(sub_map); + } + EntryNode::Unknown(_) => { + // Skip unknown node types + } + } + } + + blob_map +} + +/// Normalize file path by removing base folder prefix +/// Example: "cobblemon/index.html" -> "index.html" +/// +/// Mirrors TypeScript implementation at src/routes/wisp.ts line 291 +pub fn normalize_path(path: &str) -> String { + // Remove base folder prefix (everything before first /) + if let Some(idx) = path.find('/') { + path[idx + 1..].to_string() + } else { + path.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_path() { + assert_eq!(normalize_path("index.html"), "index.html"); + assert_eq!(normalize_path("cobblemon/index.html"), "index.html"); + assert_eq!(normalize_path("folder/subfolder/file.txt"), "subfolder/file.txt"); + assert_eq!(normalize_path("a/b/c/d.txt"), "b/c/d.txt"); + } +} + diff --git a/cli/src/cid.rs b/cli/src/cid.rs new file mode 100644 index 0000000..5190d30 --- /dev/null +++ b/cli/src/cid.rs @@ -0,0 +1,66 @@ +use jacquard_common::types::cid::IpldCid; +use sha2::{Digest, Sha256}; + +/// Compute CID (Content Identifier) for blob content +/// Uses the same algorithm as AT Protocol: CIDv1 with raw codec (0x55) and SHA-256 +/// +/// CRITICAL: This must be called on BASE64-ENCODED GZIPPED content, not just gzipped content +/// +/// Based on @atproto/common/src/ipld.ts sha256RawToCid implementation +pub fn compute_cid(content: &[u8]) -> String { + // Use node crypto to compute sha256 hash (same as AT Protocol) + let hash = Sha256::digest(content); + + // Create multihash (code 0x12 = sha2-256) + let multihash = multihash::Multihash::wrap(0x12, &hash) + .expect("SHA-256 hash should always fit in multihash"); + + // Create CIDv1 with raw codec (0x55) + let cid = IpldCid::new_v1(0x55, multihash); + + // Convert to base32 string representation + cid.to_string_of_base(multibase::Base::Base32Lower) + .unwrap_or_else(|_| cid.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use base64::Engine; + + #[test] + fn test_compute_cid() { + // Test with a simple string: "hello" + let content = b"hello"; + let cid = compute_cid(content); + + // CID should start with 'baf' for raw codec base32 + assert!(cid.starts_with("baf")); + } + + #[test] + fn test_compute_cid_base64_encoded() { + // Simulate the actual use case: gzipped then base64 encoded + use flate2::write::GzEncoder; + use flate2::Compression; + use std::io::Write; + + let original = b"hello world"; + + // Gzip compress + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(original).unwrap(); + let gzipped = encoder.finish().unwrap(); + + // Base64 encode the gzipped data + let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); + + // Compute CID on the base64 bytes + let cid = compute_cid(&base64_bytes); + + // Should be a valid CID + assert!(cid.starts_with("baf")); + assert!(cid.len() > 10); + } +} + diff --git a/cli/src/main.rs b/cli/src/main.rs index cfeb908..db0e7cf 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,9 +1,11 @@ mod builder_types; mod place_wisp; +mod cid; +mod blob_map; use clap::Parser; use jacquard::CowStr; -use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession}; +use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; use jacquard::oauth::client::OAuthClient; use jacquard::oauth::loopback::LoopbackConfig; use jacquard::prelude::IdentityResolver; @@ -11,6 +13,7 @@ use jacquard_common::types::string::{Datetime, Rkey, RecordKey}; use jacquard_common::types::blob::MimeType; use miette::IntoDiagnostic; use std::path::{Path, PathBuf}; +use std::collections::HashMap; use flate2::Compression; use flate2::write::GzEncoder; use std::io::Write; @@ -107,17 +110,56 @@ async fn deploy_site( println!("Deploying site '{}'...", site_name); - // Build directory tree - let root_dir = build_directory(agent, &path).await?; + // Try to fetch existing manifest for incremental updates + let existing_blob_map: HashMap, String)> = { + use jacquard_common::types::string::AtUri; + + // Get the DID for this session + let session_info = agent.session_info().await; + if let Some((did, _)) = session_info { + // Construct the AT URI for the record + let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); + if let Ok(uri) = AtUri::new(&uri_string) { + match agent.get_record::(&uri).await { + Ok(response) => { + match response.into_output() { + Ok(record_output) => { + let existing_manifest = record_output.value; + let blob_map = blob_map::extract_blob_map(&existing_manifest.root); + println!("Found existing manifest with {} files, checking for changes...", blob_map.len()); + blob_map + } + Err(_) => { + println!("No existing manifest found, uploading all files..."); + HashMap::new() + } + } + } + Err(_) => { + // Record doesn't exist yet - this is a new site + println!("No existing manifest found, uploading all files..."); + HashMap::new() + } + } + } else { + println!("No existing manifest found (invalid URI), uploading all files..."); + HashMap::new() + } + } else { + println!("No existing manifest found (could not get DID), uploading all files..."); + HashMap::new() + } + }; - // Count total files - let file_count = count_files(&root_dir); + // Build directory tree + let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map).await?; + let uploaded_count = total_files - reused_count; // Create the Fs record let fs_record = Fs::new() .site(CowStr::from(site_name.clone())) .root(root_dir) - .file_count(file_count as i64) + .file_count(total_files as i64) .created_at(Datetime::now()) .build(); @@ -132,8 +174,9 @@ async fn deploy_site( .and_then(|s| s.split('/').next()) .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; - println!("Deployed site '{}': {}", site_name, output.uri); - println!("Available at: https://sites.wisp.place/{}/{}", did, site_name); + println!("\n✓ Deployed site '{}': {}", site_name, output.uri); + println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); + println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); Ok(()) } @@ -142,7 +185,8 @@ async fn deploy_site( fn build_directory<'a>( agent: &'a Agent, dir_path: &'a Path, -) -> std::pin::Pin>> + 'a>> + existing_blobs: &'a HashMap, String)>, +) -> std::pin::Pin, usize, usize)>> + 'a>> { Box::pin(async move { // Collect all directory entries first @@ -177,46 +221,66 @@ fn build_directory<'a>( } // Process files concurrently with a limit of 5 - let file_entries: Vec = stream::iter(file_tasks) + let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) .map(|(name, path)| async move { - let file_node = process_file(agent, &path).await?; - Ok::<_, miette::Report>(Entry::new() + let (file_node, reused) = process_file(agent, &path, &name, existing_blobs).await?; + let entry = Entry::new() .name(CowStr::from(name)) .node(EntryNode::File(Box::new(file_node))) - .build()) + .build(); + Ok::<_, miette::Report>((entry, reused)) }) .buffer_unordered(5) .collect::>() .await .into_iter() .collect::>>()?; + + let mut file_entries = Vec::new(); + let mut reused_count = 0; + let mut total_files = 0; + + for (entry, reused) in file_results { + file_entries.push(entry); + total_files += 1; + if reused { + reused_count += 1; + } + } // Process directories recursively (sequentially to avoid too much nesting) let mut dir_entries = Vec::new(); for (name, path) in dir_tasks { - let subdir = build_directory(agent, &path).await?; + let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs).await?; dir_entries.push(Entry::new() .name(CowStr::from(name)) .node(EntryNode::Directory(Box::new(subdir))) .build()); + total_files += sub_total; + reused_count += sub_reused; } // Combine file and directory entries let mut entries = file_entries; entries.extend(dir_entries); - Ok(Directory::new() + let directory = Directory::new() .r#type(CowStr::from("directory")) .entries(entries) - .build()) + .build(); + + Ok((directory, total_files, reused_count)) }) } -/// Process a single file: gzip -> base64 -> upload blob +/// Process a single file: gzip -> base64 -> upload blob (or reuse existing) +/// Returns (File, reused: bool) async fn process_file( agent: &Agent, file_path: &Path, -) -> miette::Result> + file_name: &str, + existing_blobs: &HashMap, String)>, +) -> miette::Result<(File<'static>, bool)> { // Read file let file_data = std::fs::read(file_path).into_diagnostic()?; @@ -234,30 +298,49 @@ async fn process_file( // Base64 encode the gzipped data let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); - // Upload blob as octet-stream + // Compute CID for this file (CRITICAL: on base64-encoded gzipped content) + let file_cid = cid::compute_cid(&base64_bytes); + + // Normalize the file path for comparison + let normalized_path = blob_map::normalize_path(file_name); + + // Check if we have an existing blob with the same CID + let existing_blob = existing_blobs.get(&normalized_path) + .or_else(|| existing_blobs.get(file_name)); + + if let Some((existing_blob_ref, existing_cid)) = existing_blob { + if existing_cid == &file_cid { + // CIDs match - reuse existing blob + println!(" ✓ Reusing blob for {} (CID: {})", file_name, file_cid); + return Ok(( + File::new() + .r#type(CowStr::from("file")) + .blob(existing_blob_ref.clone()) + .encoding(CowStr::from("gzip")) + .mime_type(CowStr::from(original_mime)) + .base64(true) + .build(), + true + )); + } + } + + // File is new or changed - upload it + println!(" ↑ Uploading {} ({} bytes, CID: {})", file_name, base64_bytes.len(), file_cid); let blob = agent.upload_blob( base64_bytes, MimeType::new_static("application/octet-stream"), ).await?; - Ok(File::new() - .r#type(CowStr::from("file")) - .blob(blob) - .encoding(CowStr::from("gzip")) - .mime_type(CowStr::from(original_mime)) - .base64(true) - .build()) + Ok(( + File::new() + .r#type(CowStr::from("file")) + .blob(blob) + .encoding(CowStr::from("gzip")) + .mime_type(CowStr::from(original_mime)) + .base64(true) + .build(), + false + )) } -/// Count total files in a directory tree -fn count_files(dir: &Directory) -> usize { - let mut count = 0; - for entry in &dir.entries { - match &entry.node { - EntryNode::File(_) => count += 1, - EntryNode::Directory(subdir) => count += count_files(subdir), - _ => {} // Unknown variants - } - } - count -} -- 2.50.1 (Apple Git-155)