forked from
nekomimi.pet/wisp.place-monorepo
Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
1/**
2 * Safely rewrites absolute paths in HTML to be relative to a base path
3 * Only processes common HTML attributes and preserves external URLs, data URIs, etc.
4 */
5
6const REWRITABLE_ATTRIBUTES = [
7 'src',
8 'href',
9 'action',
10 'data',
11 'poster',
12 'srcset',
13] as const;
14
15/**
16 * Check if a path should be rewritten
17 */
18function shouldRewritePath(path: string): boolean {
19 // Don't rewrite empty paths
20 if (!path) return false;
21
22 // Don't rewrite external URLs (http://, https://, //)
23 if (path.startsWith('http://') || path.startsWith('https://') || path.startsWith('//')) {
24 return false;
25 }
26
27 // Don't rewrite data URIs or other schemes (except file paths)
28 if (path.includes(':') && !path.startsWith('./') && !path.startsWith('../')) {
29 return false;
30 }
31
32 // Don't rewrite pure anchors or paths that start with /#
33 if (path.startsWith('#') || path.startsWith('/#')) return false;
34
35 // Don't rewrite relative paths (./ or ../)
36 if (path.startsWith('./') || path.startsWith('../')) return false;
37
38 // Rewrite absolute paths (/)
39 return true;
40}
41
42/**
43 * Rewrite a single path
44 */
45function rewritePath(path: string, basePath: string): string {
46 if (!shouldRewritePath(path)) {
47 return path;
48 }
49
50 // Handle absolute paths: /file.js -> /base/file.js
51 if (path.startsWith('/')) {
52 return basePath + path.slice(1);
53 }
54
55 // At this point, only plain filenames without ./ or ../ prefix should reach here
56 // But since we're filtering those in shouldRewritePath, this shouldn't happen
57 return path;
58}
59
60/**
61 * Rewrite srcset attribute (can contain multiple URLs)
62 * Format: "url1 1x, url2 2x" or "url1 100w, url2 200w"
63 */
64function rewriteSrcset(srcset: string, basePath: string): string {
65 return srcset
66 .split(',')
67 .map(part => {
68 const trimmed = part.trim();
69 const spaceIndex = trimmed.indexOf(' ');
70
71 if (spaceIndex === -1) {
72 // No descriptor, just URL
73 return rewritePath(trimmed, basePath);
74 }
75
76 const url = trimmed.substring(0, spaceIndex);
77 const descriptor = trimmed.substring(spaceIndex);
78 return rewritePath(url, basePath) + descriptor;
79 })
80 .join(', ');
81}
82
83/**
84 * Rewrite absolute paths in HTML content
85 * Uses simple regex matching for safety (no full HTML parsing)
86 */
87export function rewriteHtmlPaths(html: string, basePath: string): string {
88 // Ensure base path ends with /
89 const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/';
90
91 let rewritten = html;
92
93 // Rewrite each attribute type
94 // Use more specific patterns to prevent ReDoS attacks
95 for (const attr of REWRITABLE_ATTRIBUTES) {
96 if (attr === 'srcset') {
97 // Special handling for srcset - use possessive quantifiers via atomic grouping simulation
98 // Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS
99 const srcsetRegex = new RegExp(
100 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
101 'gi'
102 );
103 rewritten = rewritten.replace(srcsetRegex, (match, value) => {
104 const rewrittenValue = rewriteSrcset(value, normalizedBase);
105 return `${attr}="${rewrittenValue}"`;
106 });
107 } else {
108 // Regular attributes with quoted values
109 // Limit whitespace to prevent catastrophic backtracking
110 const doubleQuoteRegex = new RegExp(
111 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
112 'gi'
113 );
114 const singleQuoteRegex = new RegExp(
115 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`,
116 'gi'
117 );
118
119 rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => {
120 const rewrittenValue = rewritePath(value, normalizedBase);
121 return `${attr}="${rewrittenValue}"`;
122 });
123
124 rewritten = rewritten.replace(singleQuoteRegex, (match, value) => {
125 const rewrittenValue = rewritePath(value, normalizedBase);
126 return `${attr}='${rewrittenValue}'`;
127 });
128 }
129 }
130
131 return rewritten;
132}
133
134/**
135 * Check if content is HTML based on content or filename
136 */
137export function isHtmlContent(
138 filepath: string,
139 contentType?: string
140): boolean {
141 if (contentType && contentType.includes('text/html')) {
142 return true;
143 }
144
145 const ext = filepath.toLowerCase().split('.').pop();
146 return ext === 'html' || ext === 'htm';
147}