Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

faster backfilling, fix wonkiness of redirects

Changed files
+86 -22
hosting-service
src
+4 -1
hosting-service/src/lib/backfill.ts
···
import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils';
import { logger } from './observability';
import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache';
+
import { clearRedirectRulesCache } from '../server';
export interface BackfillOptions {
skipExisting?: boolean; // Skip sites already in cache
···
export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> {
const {
skipExisting = true,
-
concurrency = 3,
+
concurrency = 10, // Increased from 3 to 10 for better parallelization
maxSites,
} = options;
···
try {
// Download and cache site
await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid);
+
// Clear redirect rules cache since the site was updated
+
clearRedirectRulesCache(site.did, site.rkey);
stats.cached++;
processed++;
logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
+4
hosting-service/src/lib/firehose.ts
···
import { Firehose } from '@atproto/sync'
import { IdResolver } from '@atproto/identity'
import { invalidateSiteCache, markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache'
+
import { clearRedirectRulesCache } from '../server'
const CACHE_DIR = './cache/sites'
···
pdsEndpoint,
verifiedCid
)
+
+
// Clear redirect rules cache since the site was updated
+
clearRedirectRulesCache(did, site)
// Acquire distributed lock only for database write to prevent duplicate writes
// Note: upsertSite will check cache-only mode internally and skip if needed
+63 -17
hosting-service/src/lib/redirects.ts
···
status: number;
}
+
// Maximum number of redirect rules to prevent DoS attacks
+
const MAX_REDIRECT_RULES = 1000;
+
/**
* Parse a _redirects file into an array of redirect rules
*/
···
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
const lineRaw = lines[lineNum];
if (!lineRaw) continue;
-
+
const line = lineRaw.trim();
-
+
// Skip empty lines and comments
if (!line || line.startsWith('#')) {
continue;
+
}
+
+
// Enforce max rules limit
+
if (rules.length >= MAX_REDIRECT_RULES) {
+
console.warn(`Redirect rules limit reached (${MAX_REDIRECT_RULES}), ignoring remaining rules`);
+
break;
}
try {
···
}
/**
-
* Match a request path against redirect rules
+
* Match a request path against redirect rules with loop detection
*/
export function matchRedirectRule(
requestPath: string,
···
queryParams?: Record<string, string>;
headers?: Record<string, string>;
cookies?: Record<string, string>;
-
}
+
},
+
visitedPaths: Set<string> = new Set()
): RedirectMatch | null {
// Normalize path: ensure leading slash, remove trailing slash (except for root)
let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`;
-
+
+
// Detect redirect loops
+
if (visitedPaths.has(normalizedPath)) {
+
console.warn(`Redirect loop detected for path: ${normalizedPath}`);
+
return null;
+
}
+
+
// Track this path to detect loops
+
visitedPaths.add(normalizedPath);
+
+
// Limit redirect chain depth to 10
+
if (visitedPaths.size > 10) {
+
console.warn(`Redirect chain too deep (>10) for path: ${normalizedPath}`);
+
return null;
+
}
+
for (const rule of rules) {
// Check query parameter conditions first (if any)
if (rule.queryParams) {
···
if (!context?.queryParams) {
continue;
}
-
-
const queryMatches = Object.entries(rule.queryParams).every(([key, value]) => {
+
+
// Check that all required query params are present
+
// The value in rule.queryParams is either a literal or a placeholder (:name)
+
const queryMatches = Object.entries(rule.queryParams).every(([key, expectedValue]) => {
const actualValue = context.queryParams?.[key];
-
return actualValue !== undefined;
+
+
// Query param must exist
+
if (actualValue === undefined) {
+
return false;
+
}
+
+
// If expected value is a placeholder (:name), any value is acceptable
+
// If it's a literal, it must match exactly
+
if (expectedValue && !expectedValue.startsWith(':')) {
+
return actualValue === expectedValue;
+
}
+
+
return true;
});
-
+
if (!queryMatches) {
continue;
}
···
// Build the target path by replacing placeholders
let targetPath = rule.to;
-
-
// Replace captured parameters
+
+
// Replace captured parameters (with URL encoding)
if (rule.fromParams && match.length > 1) {
for (let i = 0; i < rule.fromParams.length; i++) {
const paramName = rule.fromParams[i];
const paramValue = match[i + 1];
-
+
if (!paramName || !paramValue) continue;
-
+
+
// URL encode captured values to prevent invalid URLs
+
const encodedValue = encodeURIComponent(paramValue);
+
if (paramName === 'splat') {
-
targetPath = targetPath.replace(':splat', paramValue);
+
// For splats, preserve slashes by re-decoding them
+
const splatValue = encodedValue.replace(/%2F/g, '/');
+
targetPath = targetPath.replace(':splat', splatValue);
} else {
-
targetPath = targetPath.replace(`:${paramName}`, paramValue);
+
targetPath = targetPath.replace(`:${paramName}`, encodedValue);
}
}
}
-
// Handle query parameter replacements
+
// Handle query parameter replacements (with URL encoding)
if (rule.queryParams && context?.queryParams) {
for (const [key, placeholder] of Object.entries(rule.queryParams)) {
const actualValue = context.queryParams[key];
if (actualValue && placeholder && placeholder.startsWith(':')) {
const paramName = placeholder.slice(1);
if (paramName) {
-
targetPath = targetPath.replace(`:${paramName}`, actualValue);
+
// URL encode query parameter values
+
const encodedValue = encodeURIComponent(actualValue);
+
targetPath = targetPath.replace(`:${paramName}`, encodedValue);
}
}
}
+6
src/lib/wisp-utils.test.ts
···
expect(shouldCompressFile('text/plain')).toBe(true)
})
+
test('should NOT compress _redirects file', () => {
+
expect(shouldCompressFile('text/plain', '_redirects')).toBe(false)
+
expect(shouldCompressFile('text/plain', 'folder/_redirects')).toBe(false)
+
expect(shouldCompressFile('application/octet-stream', '_redirects')).toBe(false)
+
})
+
test('should NOT compress images', () => {
expect(shouldCompressFile('image/png')).toBe(false)
expect(shouldCompressFile('image/jpeg')).toBe(false)
+7 -2
src/lib/wisp-utils.ts
···
}
/**
-
* Determine if a file should be gzip compressed based on its MIME type
+
* Determine if a file should be gzip compressed based on its MIME type and filename
*/
-
export function shouldCompressFile(mimeType: string): boolean {
+
export function shouldCompressFile(mimeType: string, fileName?: string): boolean {
+
// Never compress _redirects file - it needs to be plain text for the hosting service
+
if (fileName && (fileName.endsWith('/_redirects') || fileName === '_redirects')) {
+
return false;
+
}
+
// Compress text-based files and uncompressed audio formats
const compressibleTypes = [
'text/html',
+2 -2
src/routes/wisp.ts
···
const originalContent = Buffer.from(arrayBuffer);
const originalMimeType = file.type || 'application/octet-stream';
-
// Determine if file should be compressed
-
const shouldCompress = shouldCompressFile(originalMimeType);
+
// Determine if file should be compressed (pass filename to exclude _redirects)
+
const shouldCompress = shouldCompressFile(originalMimeType, normalizedPath);
// Text files (HTML/CSS/JS) need base64 encoding to prevent PDS content sniffing
// Audio files just need compression without base64