Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
at main 5.5 kB view raw
1/** 2 * Safely rewrites absolute paths in HTML to be relative to a base path 3 * Only processes common HTML attributes and preserves external URLs, data URIs, etc. 4 */ 5 6const REWRITABLE_ATTRIBUTES = [ 7 'src', 8 'href', 9 'action', 10 'data', 11 'poster', 12 'srcset' 13] as const 14 15/** 16 * Check if a path should be rewritten 17 */ 18function shouldRewritePath(path: string): boolean { 19 // Don't rewrite empty paths 20 if (!path) return false 21 22 // Don't rewrite external URLs (http://, https://, //) 23 if ( 24 path.startsWith('http://') || 25 path.startsWith('https://') || 26 path.startsWith('//') 27 ) { 28 return false 29 } 30 31 // Don't rewrite data URIs or other schemes (except file paths) 32 if ( 33 path.includes(':') && 34 !path.startsWith('./') && 35 !path.startsWith('../') 36 ) { 37 return false 38 } 39 40 // Rewrite absolute paths (/) and relative paths (./ or ../ or plain filenames) 41 return true 42} 43 44/** 45 * Normalize a path by resolving . and .. segments 46 */ 47function normalizePath(path: string): string { 48 const parts = path.split('/') 49 const result: string[] = [] 50 51 for (const part of parts) { 52 if (part === '.' || part === '') { 53 // Skip current directory and empty parts (but keep leading empty for absolute paths) 54 if (part === '' && result.length === 0) { 55 result.push(part) 56 } 57 continue 58 } 59 if (part === '..') { 60 // Go up one directory (but not past root) 61 if (result.length > 0 && result[result.length - 1] !== '..') { 62 result.pop() 63 } 64 continue 65 } 66 result.push(part) 67 } 68 69 return result.join('/') 70} 71 72/** 73 * Get the directory path from a file path 74 * e.g., "folder1/folder2/file.html" -> "folder1/folder2/" 75 */ 76function getDirectory(filepath: string): string { 77 const lastSlash = filepath.lastIndexOf('/') 78 if (lastSlash === -1) { 79 return '' 80 } 81 return filepath.substring(0, lastSlash + 1) 82} 83 84/** 85 * Rewrite a single path 86 */ 87function rewritePath( 88 path: string, 89 basePath: string, 90 documentPath: string 91): string { 92 if (!shouldRewritePath(path)) { 93 return path 94 } 95 96 // Handle absolute paths: /file.js -> /base/file.js 97 if (path.startsWith('/')) { 98 return basePath + path.slice(1) 99 } 100 101 // Handle relative paths by resolving against document directory 102 const documentDir = getDirectory(documentPath) 103 let resolvedPath: string 104 105 if (path.startsWith('./')) { 106 // ./file.js relative to current directory 107 resolvedPath = documentDir + path.slice(2) 108 } else if (path.startsWith('../')) { 109 // ../file.js relative to parent directory 110 resolvedPath = documentDir + path 111 } else { 112 // file.js (no prefix) - treat as relative to current directory 113 resolvedPath = documentDir + path 114 } 115 116 // Normalize the path to resolve .. and . 117 resolvedPath = normalizePath(resolvedPath) 118 119 return basePath + resolvedPath 120} 121 122/** 123 * Rewrite srcset attribute (can contain multiple URLs) 124 * Format: "url1 1x, url2 2x" or "url1 100w, url2 200w" 125 */ 126function rewriteSrcset( 127 srcset: string, 128 basePath: string, 129 documentPath: string 130): string { 131 return srcset 132 .split(',') 133 .map((part) => { 134 const trimmed = part.trim() 135 const spaceIndex = trimmed.indexOf(' ') 136 137 if (spaceIndex === -1) { 138 // No descriptor, just URL 139 return rewritePath(trimmed, basePath, documentPath) 140 } 141 142 const url = trimmed.substring(0, spaceIndex) 143 const descriptor = trimmed.substring(spaceIndex) 144 return rewritePath(url, basePath, documentPath) + descriptor 145 }) 146 .join(', ') 147} 148 149/** 150 * Rewrite absolute and relative paths in HTML content 151 * Uses simple regex matching for safety (no full HTML parsing) 152 */ 153export function rewriteHtmlPaths( 154 html: string, 155 basePath: string, 156 documentPath: string 157): string { 158 // Ensure base path ends with / 159 const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/' 160 161 let rewritten = html 162 163 // Rewrite each attribute type 164 // Use more specific patterns to prevent ReDoS attacks 165 for (const attr of REWRITABLE_ATTRIBUTES) { 166 if (attr === 'srcset') { 167 // Special handling for srcset - use possessive quantifiers via atomic grouping simulation 168 // Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS 169 const srcsetRegex = new RegExp( 170 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 171 'gi' 172 ) 173 rewritten = rewritten.replace(srcsetRegex, (match, value) => { 174 const rewrittenValue = rewriteSrcset( 175 value, 176 normalizedBase, 177 documentPath 178 ) 179 return `${attr}="${rewrittenValue}"` 180 }) 181 } else { 182 // Regular attributes with quoted values 183 // Limit whitespace to prevent catastrophic backtracking 184 const doubleQuoteRegex = new RegExp( 185 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 186 'gi' 187 ) 188 const singleQuoteRegex = new RegExp( 189 `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`, 190 'gi' 191 ) 192 193 rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => { 194 const rewrittenValue = rewritePath( 195 value, 196 normalizedBase, 197 documentPath 198 ) 199 return `${attr}="${rewrittenValue}"` 200 }) 201 202 rewritten = rewritten.replace(singleQuoteRegex, (match, value) => { 203 const rewrittenValue = rewritePath( 204 value, 205 normalizedBase, 206 documentPath 207 ) 208 return `${attr}='${rewrittenValue}'` 209 }) 210 } 211 } 212 213 return rewritten 214} 215 216/** 217 * Check if content is HTML based on content or filename 218 */ 219export function isHtmlContent(filepath: string, contentType?: string): boolean { 220 if (contentType && contentType.includes('text/html')) { 221 return true 222 } 223 224 const ext = filepath.toLowerCase().split('.').pop() 225 return ext === 'html' || ext === 'htm' 226}