Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
at v1.0.0 4.3 kB view raw
1/** 2 * Safely rewrites absolute paths in HTML to be relative to a base path 3 * Only processes common HTML attributes and preserves external URLs, data URIs, etc. 4 */ 5 6const REWRITABLE_ATTRIBUTES = [ 7 'src', 8 'href', 9 'action', 10 'data', 11 'poster', 12 'srcset', 13] as const; 14 15/** 16 * Check if a path should be rewritten 17 */ 18function shouldRewritePath(path: string): boolean { 19 // Don't rewrite empty paths 20 if (!path) return false; 21 22 // Don't rewrite external URLs (http://, https://, //) 23 if (path.startsWith('http://') || path.startsWith('https://') || path.startsWith('//')) { 24 return false; 25 } 26 27 // Don't rewrite data URIs or other schemes (except file paths) 28 if (path.includes(':') && !path.startsWith('./') && !path.startsWith('../')) { 29 return false; 30 } 31 32 // Don't rewrite pure anchors or paths that start with /# 33 if (path.startsWith('#') || path.startsWith('/#')) return false; 34 35 // Don't rewrite relative paths (./ or ../) 36 if (path.startsWith('./') || path.startsWith('../')) return false; 37 38 // Rewrite absolute paths (/) 39 return true; 40} 41 42/** 43 * Rewrite a single path 44 */ 45function rewritePath(path: string, basePath: string): string { 46 if (!shouldRewritePath(path)) { 47 return path; 48 } 49 50 // Handle absolute paths: /file.js -> /base/file.js 51 if (path.startsWith('/')) { 52 return basePath + path.slice(1); 53 } 54 55 // At this point, only plain filenames without ./ or ../ prefix should reach here 56 // But since we're filtering those in shouldRewritePath, this shouldn't happen 57 return path; 58} 59 60/** 61 * Rewrite srcset attribute (can contain multiple URLs) 62 * Format: "url1 1x, url2 2x" or "url1 100w, url2 200w" 63 */ 64function rewriteSrcset(srcset: string, basePath: string): string { 65 return srcset 66 .split(',') 67 .map(part => { 68 const trimmed = part.trim(); 69 const spaceIndex = trimmed.indexOf(' '); 70 71 if (spaceIndex === -1) { 72 // No descriptor, just URL 73 return rewritePath(trimmed, basePath); 74 } 75 76 const url = trimmed.substring(0, spaceIndex); 77 const descriptor = trimmed.substring(spaceIndex); 78 return rewritePath(url, basePath) + descriptor; 79 }) 80 .join(', '); 81} 82 83/** 84 * Rewrite absolute paths in HTML content 85 * Uses simple regex matching for safety (no full HTML parsing) 86 */ 87export function rewriteHtmlPaths(html: string, basePath: string): string { 88 // Ensure base path ends with / 89 const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/'; 90 91 let rewritten = html; 92 93 // Rewrite each attribute type 94 // Use more specific patterns to prevent ReDoS attacks 95 for (const attr of REWRITABLE_ATTRIBUTES) { 96 if (attr === 'srcset') { 97 // Special handling for srcset - use possessive quantifiers via atomic grouping simulation 98 // Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS 99 const srcsetRegex = new RegExp( 100 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 101 'gi' 102 ); 103 rewritten = rewritten.replace(srcsetRegex, (match, value) => { 104 const rewrittenValue = rewriteSrcset(value, normalizedBase); 105 return `${attr}="${rewrittenValue}"`; 106 }); 107 } else { 108 // Regular attributes with quoted values 109 // Limit whitespace to prevent catastrophic backtracking 110 const doubleQuoteRegex = new RegExp( 111 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 112 'gi' 113 ); 114 const singleQuoteRegex = new RegExp( 115 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`, 116 'gi' 117 ); 118 119 rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => { 120 const rewrittenValue = rewritePath(value, normalizedBase); 121 return `${attr}="${rewrittenValue}"`; 122 }); 123 124 rewritten = rewritten.replace(singleQuoteRegex, (match, value) => { 125 const rewrittenValue = rewritePath(value, normalizedBase); 126 return `${attr}='${rewrittenValue}'`; 127 }); 128 } 129 } 130 131 return rewritten; 132} 133 134/** 135 * Check if content is HTML based on content or filename 136 */ 137export function isHtmlContent( 138 filepath: string, 139 contentType?: string 140): boolean { 141 if (contentType && contentType.includes('text/html')) { 142 return true; 143 } 144 145 const ext = filepath.toLowerCase().split('.').pop(); 146 return ext === 'html' || ext === 'htm'; 147}