Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.

update dockerfiles, solidify html path rewriting

+8
.dockerignore
···
*.log
.vscode
.idea
+
server
+
.prettierrc
+
testDeploy
+
.tangled
+
.crush
+
.claude
+
server
+
hosting-service
+10 -6
Dockerfile
···
COPY public ./public
# Build the application (if needed)
-
# RUN bun run build
+
RUN bun build \
+
--compile \
+
--minify \
+
--outfile server \
+
src/index.ts
+
+
FROM scratch AS runtime
+
WORKDIR /app
+
COPY --from=base /app/server /app/server
# Set environment variables (can be overridden at runtime)
ENV PORT=3000
···
# Expose the application port
EXPOSE 3000
-
# Health check
-
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
-
CMD bun -e "fetch('http://localhost:3000/health').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))"
-
# Start the application
-
CMD ["bun", "src/index.ts"]
+
CMD ["./server"]
+259 -219
hosting-service/src/lib/firehose.ts
···
-
import { existsSync, rmSync } from 'fs';
-
import { getPdsForDid, downloadAndCacheSite, extractBlobCid, fetchSiteRecord } from './utils';
-
import { upsertSite, tryAcquireLock, releaseLock } from './db';
-
import { safeFetch } from './safe-fetch';
-
import { isRecord, validateRecord } from '../lexicon/types/place/wisp/fs';
-
import { Firehose } from '@atproto/sync';
-
import { IdResolver } from '@atproto/identity';
+
import { existsSync, rmSync } from 'fs'
+
import {
+
getPdsForDid,
+
downloadAndCacheSite,
+
extractBlobCid,
+
fetchSiteRecord
+
} from './utils'
+
import { upsertSite, tryAcquireLock, releaseLock } from './db'
+
import { safeFetch } from './safe-fetch'
+
import { isRecord, validateRecord } from '../lexicon/types/place/wisp/fs'
+
import { Firehose } from '@atproto/sync'
+
import { IdResolver } from '@atproto/identity'
-
const CACHE_DIR = './cache/sites';
+
const CACHE_DIR = './cache/sites'
export class FirehoseWorker {
-
private firehose: Firehose | null = null;
-
private idResolver: IdResolver;
-
private isShuttingDown = false;
-
private lastEventTime = Date.now();
+
private firehose: Firehose | null = null
+
private idResolver: IdResolver
+
private isShuttingDown = false
+
private lastEventTime = Date.now()
-
constructor(
-
private logger?: (msg: string, data?: Record<string, unknown>) => void,
-
) {
-
this.idResolver = new IdResolver();
-
}
+
constructor(
+
private logger?: (msg: string, data?: Record<string, unknown>) => void
+
) {
+
this.idResolver = new IdResolver()
+
}
-
private log(msg: string, data?: Record<string, unknown>) {
-
const log = this.logger || console.log;
-
log(`[FirehoseWorker] ${msg}`, data || {});
-
}
+
private log(msg: string, data?: Record<string, unknown>) {
+
const log = this.logger || console.log
+
log(`[FirehoseWorker] ${msg}`, data || {})
+
}
-
start() {
-
this.log('Starting firehose worker');
-
this.connect();
-
}
+
start() {
+
this.log('Starting firehose worker')
+
this.connect()
+
}
-
stop() {
-
this.log('Stopping firehose worker');
-
this.isShuttingDown = true;
+
stop() {
+
this.log('Stopping firehose worker')
+
this.isShuttingDown = true
-
if (this.firehose) {
-
this.firehose.destroy();
-
this.firehose = null;
-
}
-
}
+
if (this.firehose) {
+
this.firehose.destroy()
+
this.firehose = null
+
}
+
}
-
private connect() {
-
if (this.isShuttingDown) return;
+
private connect() {
+
if (this.isShuttingDown) return
-
this.log('Connecting to AT Protocol firehose');
+
this.log('Connecting to AT Protocol firehose')
-
this.firehose = new Firehose({
-
idResolver: this.idResolver,
-
service: 'wss://bsky.network',
-
filterCollections: ['place.wisp.fs'],
-
handleEvent: async (evt: any) => {
-
this.lastEventTime = Date.now();
+
this.firehose = new Firehose({
+
idResolver: this.idResolver,
+
service: 'wss://bsky.network',
+
filterCollections: ['place.wisp.fs'],
+
handleEvent: async (evt: any) => {
+
this.lastEventTime = Date.now()
-
// Watch for write events
-
if (evt.event === 'create' || evt.event === 'update') {
-
const record = evt.record;
+
// Watch for write events
+
if (evt.event === 'create' || evt.event === 'update') {
+
const record = evt.record
-
// If the write is a valid place.wisp.fs record
-
if (
-
evt.collection === 'place.wisp.fs' &&
-
isRecord(record) &&
-
validateRecord(record).success
-
) {
-
this.log('Received place.wisp.fs event', {
-
did: evt.did,
-
event: evt.event,
-
rkey: evt.rkey,
-
});
+
// If the write is a valid place.wisp.fs record
+
if (
+
evt.collection === 'place.wisp.fs' &&
+
isRecord(record) &&
+
validateRecord(record).success
+
) {
+
this.log('Received place.wisp.fs event', {
+
did: evt.did,
+
event: evt.event,
+
rkey: evt.rkey
+
})
-
try {
-
await this.handleCreateOrUpdate(evt.did, evt.rkey, record, evt.cid?.toString());
-
} catch (err) {
-
this.log('Error handling event', {
-
did: evt.did,
-
event: evt.event,
-
rkey: evt.rkey,
-
error: err instanceof Error ? err.message : String(err),
-
});
-
}
-
}
-
} else if (evt.event === 'delete' && evt.collection === 'place.wisp.fs') {
-
this.log('Received delete event', {
-
did: evt.did,
-
rkey: evt.rkey,
-
});
+
try {
+
await this.handleCreateOrUpdate(
+
evt.did,
+
evt.rkey,
+
record,
+
evt.cid?.toString()
+
)
+
} catch (err) {
+
this.log('Error handling event', {
+
did: evt.did,
+
event: evt.event,
+
rkey: evt.rkey,
+
error:
+
err instanceof Error
+
? err.message
+
: String(err)
+
})
+
}
+
}
+
} else if (
+
evt.event === 'delete' &&
+
evt.collection === 'place.wisp.fs'
+
) {
+
this.log('Received delete event', {
+
did: evt.did,
+
rkey: evt.rkey
+
})
-
try {
-
await this.handleDelete(evt.did, evt.rkey);
-
} catch (err) {
-
this.log('Error handling delete', {
-
did: evt.did,
-
rkey: evt.rkey,
-
error: err instanceof Error ? err.message : String(err),
-
});
-
}
-
}
-
},
-
onError: (err: any) => {
-
this.log('Firehose error', {
-
error: err instanceof Error ? err.message : String(err),
-
stack: err instanceof Error ? err.stack : undefined,
-
fullError: err,
-
});
-
console.error('Full firehose error:', err);
-
},
-
});
+
try {
+
await this.handleDelete(evt.did, evt.rkey)
+
} catch (err) {
+
this.log('Error handling delete', {
+
did: evt.did,
+
rkey: evt.rkey,
+
error:
+
err instanceof Error ? err.message : String(err)
+
})
+
}
+
}
+
},
+
onError: (err: any) => {
+
this.log('Firehose error', {
+
error: err instanceof Error ? err.message : String(err),
+
stack: err instanceof Error ? err.stack : undefined,
+
fullError: err
+
})
+
console.error('Full firehose error:', err)
+
}
+
})
-
this.firehose.start();
-
this.log('Firehose started');
-
}
+
this.firehose.start()
+
this.log('Firehose started')
+
}
-
private async handleCreateOrUpdate(did: string, site: string, record: any, eventCid?: string) {
-
this.log('Processing create/update', { did, site });
+
private async handleCreateOrUpdate(
+
did: string,
+
site: string,
+
record: any,
+
eventCid?: string
+
) {
+
this.log('Processing create/update', { did, site })
-
// Record is already validated in handleEvent
-
const fsRecord = record;
+
// Record is already validated in handleEvent
+
const fsRecord = record
-
const pdsEndpoint = await getPdsForDid(did);
-
if (!pdsEndpoint) {
-
this.log('Could not resolve PDS for DID', { did });
-
return;
-
}
+
const pdsEndpoint = await getPdsForDid(did)
+
if (!pdsEndpoint) {
+
this.log('Could not resolve PDS for DID', { did })
+
return
+
}
-
this.log('Resolved PDS', { did, pdsEndpoint });
+
this.log('Resolved PDS', { did, pdsEndpoint })
-
// Verify record exists on PDS and fetch its CID
-
let verifiedCid: string;
-
try {
-
const result = await fetchSiteRecord(did, site);
+
// Verify record exists on PDS and fetch its CID
+
let verifiedCid: string
+
try {
+
const result = await fetchSiteRecord(did, site)
-
if (!result) {
-
this.log('Record not found on PDS, skipping cache', { did, site });
-
return;
-
}
+
if (!result) {
+
this.log('Record not found on PDS, skipping cache', {
+
did,
+
site
+
})
+
return
+
}
-
verifiedCid = result.cid;
+
verifiedCid = result.cid
-
// Verify event CID matches PDS CID (prevent cache poisoning)
-
if (eventCid && eventCid !== verifiedCid) {
-
this.log('CID mismatch detected - potential spoofed event', {
-
did,
-
site,
-
eventCid,
-
verifiedCid
-
});
-
return;
-
}
+
// Verify event CID matches PDS CID (prevent cache poisoning)
+
if (eventCid && eventCid !== verifiedCid) {
+
this.log('CID mismatch detected - potential spoofed event', {
+
did,
+
site,
+
eventCid,
+
verifiedCid
+
})
+
return
+
}
-
this.log('Record verified on PDS', { did, site, cid: verifiedCid });
-
} catch (err) {
-
this.log('Failed to verify record on PDS', {
-
did,
-
site,
-
error: err instanceof Error ? err.message : String(err),
-
});
-
return;
-
}
+
this.log('Record verified on PDS', { did, site, cid: verifiedCid })
+
} catch (err) {
+
this.log('Failed to verify record on PDS', {
+
did,
+
site,
+
error: err instanceof Error ? err.message : String(err)
+
})
+
return
+
}
-
// Cache the record with verified CID (uses atomic swap internally)
-
// All instances cache locally for edge serving
-
await downloadAndCacheSite(did, site, fsRecord, pdsEndpoint, verifiedCid);
+
// Cache the record with verified CID (uses atomic swap internally)
+
// All instances cache locally for edge serving
+
await downloadAndCacheSite(
+
did,
+
site,
+
fsRecord,
+
pdsEndpoint,
+
verifiedCid
+
)
-
// Acquire distributed lock only for database write to prevent duplicate writes
-
const lockKey = `db:upsert:${did}:${site}`;
-
const lockAcquired = await tryAcquireLock(lockKey);
+
// Acquire distributed lock only for database write to prevent duplicate writes
+
const lockKey = `db:upsert:${did}:${site}`
+
const lockAcquired = await tryAcquireLock(lockKey)
-
if (!lockAcquired) {
-
this.log('Another instance is writing to DB, skipping upsert', { did, site });
-
this.log('Successfully processed create/update (cached locally)', { did, site });
-
return;
-
}
+
if (!lockAcquired) {
+
this.log('Another instance is writing to DB, skipping upsert', {
+
did,
+
site
+
})
+
this.log('Successfully processed create/update (cached locally)', {
+
did,
+
site
+
})
+
return
+
}
-
try {
-
// Upsert site to database (only one instance does this)
-
await upsertSite(did, site, fsRecord.site);
-
this.log('Successfully processed create/update (cached + DB updated)', { did, site });
-
} finally {
-
// Always release lock, even if DB write fails
-
await releaseLock(lockKey);
-
}
-
}
+
try {
+
// Upsert site to database (only one instance does this)
+
await upsertSite(did, site, fsRecord.site)
+
this.log(
+
'Successfully processed create/update (cached + DB updated)',
+
{ did, site }
+
)
+
} finally {
+
// Always release lock, even if DB write fails
+
await releaseLock(lockKey)
+
}
+
}
-
private async handleDelete(did: string, site: string) {
-
this.log('Processing delete', { did, site });
+
private async handleDelete(did: string, site: string) {
+
this.log('Processing delete', { did, site })
-
// All instances should delete their local cache (no lock needed)
-
const pdsEndpoint = await getPdsForDid(did);
-
if (!pdsEndpoint) {
-
this.log('Could not resolve PDS for DID', { did });
-
return;
-
}
+
// All instances should delete their local cache (no lock needed)
+
const pdsEndpoint = await getPdsForDid(did)
+
if (!pdsEndpoint) {
+
this.log('Could not resolve PDS for DID', { did })
+
return
+
}
-
// Verify record is actually deleted from PDS
-
try {
-
const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}`;
-
const recordRes = await safeFetch(recordUrl);
+
// Verify record is actually deleted from PDS
+
try {
+
const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}`
+
const recordRes = await safeFetch(recordUrl)
-
if (recordRes.ok) {
-
this.log('Record still exists on PDS, not deleting cache', {
-
did,
-
site,
-
});
-
return;
-
}
+
if (recordRes.ok) {
+
this.log('Record still exists on PDS, not deleting cache', {
+
did,
+
site
+
})
+
return
+
}
-
this.log('Verified record is deleted from PDS', {
-
did,
-
site,
-
status: recordRes.status,
-
});
-
} catch (err) {
-
this.log('Error verifying deletion on PDS', {
-
did,
-
site,
-
error: err instanceof Error ? err.message : String(err),
-
});
-
}
+
this.log('Verified record is deleted from PDS', {
+
did,
+
site,
+
status: recordRes.status
+
})
+
} catch (err) {
+
this.log('Error verifying deletion on PDS', {
+
did,
+
site,
+
error: err instanceof Error ? err.message : String(err)
+
})
+
}
-
// Delete cache
-
this.deleteCache(did, site);
+
// Delete cache
+
this.deleteCache(did, site)
-
this.log('Successfully processed delete', { did, site });
-
}
+
this.log('Successfully processed delete', { did, site })
+
}
-
private deleteCache(did: string, site: string) {
-
const cacheDir = `${CACHE_DIR}/${did}/${site}`;
+
private deleteCache(did: string, site: string) {
+
const cacheDir = `${CACHE_DIR}/${did}/${site}`
-
if (!existsSync(cacheDir)) {
-
this.log('Cache directory does not exist, nothing to delete', {
-
did,
-
site,
-
});
-
return;
-
}
+
if (!existsSync(cacheDir)) {
+
this.log('Cache directory does not exist, nothing to delete', {
+
did,
+
site
+
})
+
return
+
}
-
try {
-
rmSync(cacheDir, { recursive: true, force: true });
-
this.log('Cache deleted', { did, site, path: cacheDir });
-
} catch (err) {
-
this.log('Failed to delete cache', {
-
did,
-
site,
-
path: cacheDir,
-
error: err instanceof Error ? err.message : String(err),
-
});
-
}
-
}
+
try {
+
rmSync(cacheDir, { recursive: true, force: true })
+
this.log('Cache deleted', { did, site, path: cacheDir })
+
} catch (err) {
+
this.log('Failed to delete cache', {
+
did,
+
site,
+
path: cacheDir,
+
error: err instanceof Error ? err.message : String(err)
+
})
+
}
+
}
-
getHealth() {
-
const isConnected = this.firehose !== null;
-
const timeSinceLastEvent = Date.now() - this.lastEventTime;
+
getHealth() {
+
const isConnected = this.firehose !== null
+
const timeSinceLastEvent = Date.now() - this.lastEventTime
-
return {
-
connected: isConnected,
-
lastEventTime: this.lastEventTime,
-
timeSinceLastEvent,
-
healthy: isConnected && timeSinceLastEvent < 300000, // 5 minutes
-
};
-
}
+
return {
+
connected: isConnected,
+
lastEventTime: this.lastEventTime,
+
timeSinceLastEvent,
+
healthy: isConnected && timeSinceLastEvent < 300000 // 5 minutes
+
}
+
}
}
+457
hosting-service/src/lib/html-rewriter.test.ts
···
+
import { describe, test, expect } from 'bun:test'
+
import { rewriteHtmlPaths, isHtmlContent } from './html-rewriter'
+
+
describe('rewriteHtmlPaths', () => {
+
const basePath = '/identifier/site/'
+
+
describe('absolute paths', () => {
+
test('rewrites absolute paths with leading slash', () => {
+
const html = '<img src="/image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('rewrites nested absolute paths', () => {
+
const html = '<link href="/css/style.css">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<link href="/identifier/site/css/style.css">')
+
})
+
})
+
+
describe('relative paths from root document', () => {
+
test('rewrites relative paths with ./ prefix', () => {
+
const html = '<img src="./image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('rewrites relative paths without prefix', () => {
+
const html = '<img src="image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('rewrites relative paths with ../ (should stay at root)', () => {
+
const html = '<img src="../image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
})
+
+
describe('relative paths from nested documents', () => {
+
test('rewrites relative path from nested document', () => {
+
const html = '<img src="./photo.jpg">'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/index.html'
+
)
+
expect(result).toBe(
+
'<img src="/identifier/site/folder1/folder2/photo.jpg">'
+
)
+
})
+
+
test('rewrites plain filename from nested document', () => {
+
const html = '<script src="app.js"></script>'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/index.html'
+
)
+
expect(result).toBe(
+
'<script src="/identifier/site/folder1/folder2/app.js"></script>'
+
)
+
})
+
+
test('rewrites ../ to go up one level', () => {
+
const html = '<img src="../image.png">'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/folder3/index.html'
+
)
+
expect(result).toBe(
+
'<img src="/identifier/site/folder1/folder2/image.png">'
+
)
+
})
+
+
test('rewrites multiple ../ to go up multiple levels', () => {
+
const html = '<link href="../../css/style.css">'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/folder3/index.html'
+
)
+
expect(result).toBe(
+
'<link href="/identifier/site/folder1/css/style.css">'
+
)
+
})
+
+
test('rewrites ../ with additional path segments', () => {
+
const html = '<img src="../assets/logo.png">'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'pages/about/index.html'
+
)
+
expect(result).toBe(
+
'<img src="/identifier/site/pages/assets/logo.png">'
+
)
+
})
+
+
test('handles complex nested relative paths', () => {
+
const html = '<script src="../../lib/vendor/jquery.js"></script>'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'pages/blog/post/index.html'
+
)
+
expect(result).toBe(
+
'<script src="/identifier/site/pages/lib/vendor/jquery.js"></script>'
+
)
+
})
+
+
test('handles ../ going past root (stays at root)', () => {
+
const html = '<img src="../../../image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'folder1/index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
})
+
+
describe('external URLs and special schemes', () => {
+
test('does not rewrite http URLs', () => {
+
const html = '<img src="http://example.com/image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="http://example.com/image.png">')
+
})
+
+
test('does not rewrite https URLs', () => {
+
const html = '<link href="https://cdn.example.com/style.css">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<link href="https://cdn.example.com/style.css">'
+
)
+
})
+
+
test('does not rewrite protocol-relative URLs', () => {
+
const html = '<script src="//cdn.example.com/script.js"></script>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<script src="//cdn.example.com/script.js"></script>'
+
)
+
})
+
+
test('does not rewrite data URIs', () => {
+
const html =
+
'<img src="">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<img src="">'
+
)
+
})
+
+
test('does not rewrite mailto links', () => {
+
const html = '<a href="mailto:test@example.com">Email</a>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<a href="mailto:test@example.com">Email</a>')
+
})
+
+
test('does not rewrite tel links', () => {
+
const html = '<a href="tel:+1234567890">Call</a>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<a href="tel:+1234567890">Call</a>')
+
})
+
})
+
+
describe('different HTML attributes', () => {
+
test('rewrites src attribute', () => {
+
const html = '<img src="/image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('rewrites href attribute', () => {
+
const html = '<a href="/page.html">Link</a>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<a href="/identifier/site/page.html">Link</a>')
+
})
+
+
test('rewrites action attribute', () => {
+
const html = '<form action="/submit"></form>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<form action="/identifier/site/submit"></form>')
+
})
+
+
test('rewrites data attribute', () => {
+
const html = '<object data="/document.pdf"></object>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<object data="/identifier/site/document.pdf"></object>'
+
)
+
})
+
+
test('rewrites poster attribute', () => {
+
const html = '<video poster="/thumbnail.jpg"></video>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<video poster="/identifier/site/thumbnail.jpg"></video>'
+
)
+
})
+
+
test('rewrites srcset attribute with single URL', () => {
+
const html = '<img srcset="/image.png 1x">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<img srcset="/identifier/site/image.png 1x">'
+
)
+
})
+
+
test('rewrites srcset attribute with multiple URLs', () => {
+
const html = '<img srcset="/image-1x.png 1x, /image-2x.png 2x">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<img srcset="/identifier/site/image-1x.png 1x, /identifier/site/image-2x.png 2x">'
+
)
+
})
+
+
test('rewrites srcset with width descriptors', () => {
+
const html = '<img srcset="/small.jpg 320w, /large.jpg 1024w">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<img srcset="/identifier/site/small.jpg 320w, /identifier/site/large.jpg 1024w">'
+
)
+
})
+
+
test('rewrites srcset with relative paths from nested document', () => {
+
const html = '<img srcset="../img1.png 1x, ../img2.png 2x">'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/index.html'
+
)
+
expect(result).toBe(
+
'<img srcset="/identifier/site/folder1/img1.png 1x, /identifier/site/folder1/img2.png 2x">'
+
)
+
})
+
})
+
+
describe('quote handling', () => {
+
test('handles double quotes', () => {
+
const html = '<img src="/image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('handles single quotes', () => {
+
const html = "<img src='/image.png'>"
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe("<img src='/identifier/site/image.png'>")
+
})
+
+
test('handles mixed quotes in same document', () => {
+
const html = '<img src="/img1.png"><link href=\'/style.css\'>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<img src="/identifier/site/img1.png"><link href=\'/identifier/site/style.css\'>'
+
)
+
})
+
})
+
+
describe('multiple rewrites in same document', () => {
+
test('rewrites multiple attributes in complex HTML', () => {
+
const html = `
+
<!DOCTYPE html>
+
<html>
+
<head>
+
<link href="/css/style.css" rel="stylesheet">
+
<script src="/js/app.js"></script>
+
</head>
+
<body>
+
<img src="/images/logo.png" alt="Logo">
+
<a href="/about.html">About</a>
+
<form action="/submit">
+
<button type="submit">Submit</button>
+
</form>
+
</body>
+
</html>
+
`
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toContain('href="/identifier/site/css/style.css"')
+
expect(result).toContain('src="/identifier/site/js/app.js"')
+
expect(result).toContain('src="/identifier/site/images/logo.png"')
+
expect(result).toContain('href="/identifier/site/about.html"')
+
expect(result).toContain('action="/identifier/site/submit"')
+
})
+
+
test('handles mix of relative and absolute paths', () => {
+
const html = `
+
<img src="/abs/image.png">
+
<img src="./rel/image.png">
+
<img src="../parent/image.png">
+
<img src="https://external.com/image.png">
+
`
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/page.html'
+
)
+
expect(result).toContain('src="/identifier/site/abs/image.png"')
+
expect(result).toContain(
+
'src="/identifier/site/folder1/folder2/rel/image.png"'
+
)
+
expect(result).toContain(
+
'src="/identifier/site/folder1/parent/image.png"'
+
)
+
expect(result).toContain('src="https://external.com/image.png"')
+
})
+
})
+
+
describe('edge cases', () => {
+
test('handles empty src attribute', () => {
+
const html = '<img src="">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="">')
+
})
+
+
test('handles basePath without trailing slash', () => {
+
const html = '<img src="/image.png">'
+
const result = rewriteHtmlPaths(html, '/identifier/site', 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('handles basePath with trailing slash', () => {
+
const html = '<img src="/image.png">'
+
const result = rewriteHtmlPaths(
+
html,
+
'/identifier/site/',
+
'index.html'
+
)
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('handles whitespace around equals sign', () => {
+
const html = '<img src = "/image.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png">')
+
})
+
+
test('preserves query strings in URLs', () => {
+
const html = '<img src="/image.png?v=123">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe('<img src="/identifier/site/image.png?v=123">')
+
})
+
+
test('preserves hash fragments in URLs', () => {
+
const html = '<a href="/page.html#section">Link</a>'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<a href="/identifier/site/page.html#section">Link</a>'
+
)
+
})
+
+
test('handles paths with special characters', () => {
+
const html = '<img src="/folder-name/file_name.png">'
+
const result = rewriteHtmlPaths(html, basePath, 'index.html')
+
expect(result).toBe(
+
'<img src="/identifier/site/folder-name/file_name.png">'
+
)
+
})
+
})
+
+
describe('real-world scenario', () => {
+
test('handles the example from the bug report', () => {
+
// HTML file at: /folder1/folder2/folder3/index.html
+
// Image at: /folder1/folder2/img.png
+
// Reference: src="../img.png"
+
const html = '<img src="../img.png">'
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'folder1/folder2/folder3/index.html'
+
)
+
expect(result).toBe(
+
'<img src="/identifier/site/folder1/folder2/img.png">'
+
)
+
})
+
+
test('handles deeply nested static site structure', () => {
+
// A typical static site with nested pages and shared assets
+
const html = `
+
<!DOCTYPE html>
+
<html>
+
<head>
+
<link href="../../css/style.css" rel="stylesheet">
+
<link href="../../css/theme.css" rel="stylesheet">
+
<script src="../../js/main.js"></script>
+
</head>
+
<body>
+
<img src="../../images/logo.png" alt="Logo">
+
<img src="./post-image.jpg" alt="Post">
+
<a href="../index.html">Back to Blog</a>
+
<a href="../../index.html">Home</a>
+
</body>
+
</html>
+
`
+
const result = rewriteHtmlPaths(
+
html,
+
basePath,
+
'blog/posts/my-post.html'
+
)
+
+
// Assets two levels up
+
expect(result).toContain('href="/identifier/site/css/style.css"')
+
expect(result).toContain('href="/identifier/site/css/theme.css"')
+
expect(result).toContain('src="/identifier/site/js/main.js"')
+
expect(result).toContain('src="/identifier/site/images/logo.png"')
+
+
// Same directory
+
expect(result).toContain(
+
'src="/identifier/site/blog/posts/post-image.jpg"'
+
)
+
+
// One level up
+
expect(result).toContain('href="/identifier/site/blog/index.html"')
+
+
// Two levels up
+
expect(result).toContain('href="/identifier/site/index.html"')
+
})
+
})
+
})
+
+
describe('isHtmlContent', () => {
+
test('identifies HTML by content type', () => {
+
expect(isHtmlContent('file.txt', 'text/html')).toBe(true)
+
expect(isHtmlContent('file.txt', 'text/html; charset=utf-8')).toBe(
+
true
+
)
+
})
+
+
test('identifies HTML by .html extension', () => {
+
expect(isHtmlContent('index.html')).toBe(true)
+
expect(isHtmlContent('page.html', undefined)).toBe(true)
+
expect(isHtmlContent('/path/to/file.html')).toBe(true)
+
})
+
+
test('identifies HTML by .htm extension', () => {
+
expect(isHtmlContent('index.htm')).toBe(true)
+
expect(isHtmlContent('page.htm', undefined)).toBe(true)
+
})
+
+
test('handles case-insensitive extensions', () => {
+
expect(isHtmlContent('INDEX.HTML')).toBe(true)
+
expect(isHtmlContent('page.HTM')).toBe(true)
+
expect(isHtmlContent('File.HtMl')).toBe(true)
+
})
+
+
test('returns false for non-HTML files', () => {
+
expect(isHtmlContent('script.js')).toBe(false)
+
expect(isHtmlContent('style.css')).toBe(false)
+
expect(isHtmlContent('image.png')).toBe(false)
+
expect(isHtmlContent('data.json')).toBe(false)
+
})
+
+
test('returns false for files with no extension', () => {
+
expect(isHtmlContent('README')).toBe(false)
+
expect(isHtmlContent('Makefile')).toBe(false)
+
})
+
})
+178 -99
hosting-service/src/lib/html-rewriter.ts
···
*/
const REWRITABLE_ATTRIBUTES = [
-
'src',
-
'href',
-
'action',
-
'data',
-
'poster',
-
'srcset',
-
] as const;
+
'src',
+
'href',
+
'action',
+
'data',
+
'poster',
+
'srcset'
+
] as const
/**
* Check if a path should be rewritten
*/
function shouldRewritePath(path: string): boolean {
-
// Don't rewrite empty paths
-
if (!path) return false;
+
// Don't rewrite empty paths
+
if (!path) return false
-
// Don't rewrite external URLs (http://, https://, //)
-
if (path.startsWith('http://') || path.startsWith('https://') || path.startsWith('//')) {
-
return false;
-
}
+
// Don't rewrite external URLs (http://, https://, //)
+
if (
+
path.startsWith('http://') ||
+
path.startsWith('https://') ||
+
path.startsWith('//')
+
) {
+
return false
+
}
-
// Don't rewrite data URIs or other schemes (except file paths)
-
if (path.includes(':') && !path.startsWith('./') && !path.startsWith('../')) {
-
return false;
-
}
+
// Don't rewrite data URIs or other schemes (except file paths)
+
if (
+
path.includes(':') &&
+
!path.startsWith('./') &&
+
!path.startsWith('../')
+
) {
+
return false
+
}
-
// Don't rewrite pure anchors or paths that start with /#
-
if (path.startsWith('#') || path.startsWith('/#')) return false;
+
// Rewrite absolute paths (/) and relative paths (./ or ../ or plain filenames)
+
return true
+
}
+
+
/**
+
* Normalize a path by resolving . and .. segments
+
*/
+
function normalizePath(path: string): string {
+
const parts = path.split('/')
+
const result: string[] = []
+
+
for (const part of parts) {
+
if (part === '.' || part === '') {
+
// Skip current directory and empty parts (but keep leading empty for absolute paths)
+
if (part === '' && result.length === 0) {
+
result.push(part)
+
}
+
continue
+
}
+
if (part === '..') {
+
// Go up one directory (but not past root)
+
if (result.length > 0 && result[result.length - 1] !== '..') {
+
result.pop()
+
}
+
continue
+
}
+
result.push(part)
+
}
-
// Don't rewrite relative paths (./ or ../)
-
if (path.startsWith('./') || path.startsWith('../')) return false;
+
return result.join('/')
+
}
-
// Rewrite absolute paths (/)
-
return true;
+
/**
+
* Get the directory path from a file path
+
* e.g., "folder1/folder2/file.html" -> "folder1/folder2/"
+
*/
+
function getDirectory(filepath: string): string {
+
const lastSlash = filepath.lastIndexOf('/')
+
if (lastSlash === -1) {
+
return ''
+
}
+
return filepath.substring(0, lastSlash + 1)
}
/**
* Rewrite a single path
*/
-
function rewritePath(path: string, basePath: string): string {
-
if (!shouldRewritePath(path)) {
-
return path;
-
}
+
function rewritePath(
+
path: string,
+
basePath: string,
+
documentPath: string
+
): string {
+
if (!shouldRewritePath(path)) {
+
return path
+
}
+
+
// Handle absolute paths: /file.js -> /base/file.js
+
if (path.startsWith('/')) {
+
return basePath + path.slice(1)
+
}
+
+
// Handle relative paths by resolving against document directory
+
const documentDir = getDirectory(documentPath)
+
let resolvedPath: string
-
// Handle absolute paths: /file.js -> /base/file.js
-
if (path.startsWith('/')) {
-
return basePath + path.slice(1);
-
}
+
if (path.startsWith('./')) {
+
// ./file.js relative to current directory
+
resolvedPath = documentDir + path.slice(2)
+
} else if (path.startsWith('../')) {
+
// ../file.js relative to parent directory
+
resolvedPath = documentDir + path
+
} else {
+
// file.js (no prefix) - treat as relative to current directory
+
resolvedPath = documentDir + path
+
}
-
// At this point, only plain filenames without ./ or ../ prefix should reach here
-
// But since we're filtering those in shouldRewritePath, this shouldn't happen
-
return path;
+
// Normalize the path to resolve .. and .
+
resolvedPath = normalizePath(resolvedPath)
+
+
return basePath + resolvedPath
}
/**
* Rewrite srcset attribute (can contain multiple URLs)
* Format: "url1 1x, url2 2x" or "url1 100w, url2 200w"
*/
-
function rewriteSrcset(srcset: string, basePath: string): string {
-
return srcset
-
.split(',')
-
.map(part => {
-
const trimmed = part.trim();
-
const spaceIndex = trimmed.indexOf(' ');
+
function rewriteSrcset(
+
srcset: string,
+
basePath: string,
+
documentPath: string
+
): string {
+
return srcset
+
.split(',')
+
.map((part) => {
+
const trimmed = part.trim()
+
const spaceIndex = trimmed.indexOf(' ')
-
if (spaceIndex === -1) {
-
// No descriptor, just URL
-
return rewritePath(trimmed, basePath);
-
}
+
if (spaceIndex === -1) {
+
// No descriptor, just URL
+
return rewritePath(trimmed, basePath, documentPath)
+
}
-
const url = trimmed.substring(0, spaceIndex);
-
const descriptor = trimmed.substring(spaceIndex);
-
return rewritePath(url, basePath) + descriptor;
-
})
-
.join(', ');
+
const url = trimmed.substring(0, spaceIndex)
+
const descriptor = trimmed.substring(spaceIndex)
+
return rewritePath(url, basePath, documentPath) + descriptor
+
})
+
.join(', ')
}
/**
-
* Rewrite absolute paths in HTML content
+
* Rewrite absolute and relative paths in HTML content
* Uses simple regex matching for safety (no full HTML parsing)
*/
-
export function rewriteHtmlPaths(html: string, basePath: string): string {
-
// Ensure base path ends with /
-
const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/';
+
export function rewriteHtmlPaths(
+
html: string,
+
basePath: string,
+
documentPath: string
+
): string {
+
// Ensure base path ends with /
+
const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/'
-
let rewritten = html;
+
let rewritten = html
-
// Rewrite each attribute type
-
// Use more specific patterns to prevent ReDoS attacks
-
for (const attr of REWRITABLE_ATTRIBUTES) {
-
if (attr === 'srcset') {
-
// Special handling for srcset - use possessive quantifiers via atomic grouping simulation
-
// Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS
-
const srcsetRegex = new RegExp(
-
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
-
'gi'
-
);
-
rewritten = rewritten.replace(srcsetRegex, (match, value) => {
-
const rewrittenValue = rewriteSrcset(value, normalizedBase);
-
return `${attr}="${rewrittenValue}"`;
-
});
-
} else {
-
// Regular attributes with quoted values
-
// Limit whitespace to prevent catastrophic backtracking
-
const doubleQuoteRegex = new RegExp(
-
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
-
'gi'
-
);
-
const singleQuoteRegex = new RegExp(
-
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`,
-
'gi'
-
);
+
// Rewrite each attribute type
+
// Use more specific patterns to prevent ReDoS attacks
+
for (const attr of REWRITABLE_ATTRIBUTES) {
+
if (attr === 'srcset') {
+
// Special handling for srcset - use possessive quantifiers via atomic grouping simulation
+
// Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS
+
const srcsetRegex = new RegExp(
+
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
+
'gi'
+
)
+
rewritten = rewritten.replace(srcsetRegex, (match, value) => {
+
const rewrittenValue = rewriteSrcset(
+
value,
+
normalizedBase,
+
documentPath
+
)
+
return `${attr}="${rewrittenValue}"`
+
})
+
} else {
+
// Regular attributes with quoted values
+
// Limit whitespace to prevent catastrophic backtracking
+
const doubleQuoteRegex = new RegExp(
+
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
+
'gi'
+
)
+
const singleQuoteRegex = new RegExp(
+
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`,
+
'gi'
+
)
-
rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => {
-
const rewrittenValue = rewritePath(value, normalizedBase);
-
return `${attr}="${rewrittenValue}"`;
-
});
+
rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => {
+
const rewrittenValue = rewritePath(
+
value,
+
normalizedBase,
+
documentPath
+
)
+
return `${attr}="${rewrittenValue}"`
+
})
-
rewritten = rewritten.replace(singleQuoteRegex, (match, value) => {
-
const rewrittenValue = rewritePath(value, normalizedBase);
-
return `${attr}='${rewrittenValue}'`;
-
});
-
}
-
}
+
rewritten = rewritten.replace(singleQuoteRegex, (match, value) => {
+
const rewrittenValue = rewritePath(
+
value,
+
normalizedBase,
+
documentPath
+
)
+
return `${attr}='${rewrittenValue}'`
+
})
+
}
+
}
-
return rewritten;
+
return rewritten
}
/**
* Check if content is HTML based on content or filename
*/
-
export function isHtmlContent(
-
filepath: string,
-
contentType?: string
-
): boolean {
-
if (contentType && contentType.includes('text/html')) {
-
return true;
-
}
+
export function isHtmlContent(filepath: string, contentType?: string): boolean {
+
if (contentType && contentType.includes('text/html')) {
+
return true
+
}
-
const ext = filepath.toLowerCase().split('.').pop();
-
return ext === 'html' || ext === 'htm';
+
const ext = filepath.toLowerCase().split('.').pop()
+
return ext === 'html' || ext === 'htm'
}
+3 -2
hosting-service/src/server.ts
···
} else {
content = readFileSync(cachedFile, 'utf-8');
}
-
const rewritten = rewriteHtmlPaths(content, basePath);
+
const rewritten = rewriteHtmlPaths(content, basePath, requestPath);
// Recompress the HTML for efficient delivery
const { gzipSync } = await import('zlib');
···
} else {
content = readFileSync(indexFile, 'utf-8');
}
-
const rewritten = rewriteHtmlPaths(content, basePath);
+
const indexPath = `${requestPath}/index.html`;
+
const rewritten = rewriteHtmlPaths(content, basePath, indexPath);
// Recompress the HTML for efficient delivery
const { gzipSync } = await import('zlib');