/**
* Safely rewrites absolute paths in HTML to be relative to a base path
* Only processes common HTML attributes and preserves external URLs, data URIs, etc.
*/
const REWRITABLE_ATTRIBUTES = [
'src',
'href',
'action',
'data',
'poster',
'srcset'
] as const
/**
* Check if a path should be rewritten
*/
function shouldRewritePath(path: string): boolean {
// Don't rewrite empty paths
if (!path) return false
// Don't rewrite external URLs (http://, https://, //)
if (
path.startsWith('http://') ||
path.startsWith('https://') ||
path.startsWith('//')
) {
return false
}
// Don't rewrite data URIs or other schemes (except file paths)
if (
path.includes(':') &&
!path.startsWith('./') &&
!path.startsWith('../')
) {
return false
}
// Rewrite absolute paths (/) and relative paths (./ or ../ or plain filenames)
return true
}
/**
* Normalize a path by resolving . and .. segments
*/
function normalizePath(path: string): string {
const parts = path.split('/')
const result: string[] = []
for (const part of parts) {
if (part === '.' || part === '') {
// Skip current directory and empty parts (but keep leading empty for absolute paths)
if (part === '' && result.length === 0) {
result.push(part)
}
continue
}
if (part === '..') {
// Go up one directory (but not past root)
if (result.length > 0 && result[result.length - 1] !== '..') {
result.pop()
}
continue
}
result.push(part)
}
return result.join('/')
}
/**
* Get the directory path from a file path
* e.g., "folder1/folder2/file.html" -> "folder1/folder2/"
*/
function getDirectory(filepath: string): string {
const lastSlash = filepath.lastIndexOf('/')
if (lastSlash === -1) {
return ''
}
return filepath.substring(0, lastSlash + 1)
}
/**
* Rewrite a single path
*/
function rewritePath(
path: string,
basePath: string,
documentPath: string
): string {
if (!shouldRewritePath(path)) {
return path
}
// Handle absolute paths: /file.js -> /base/file.js
if (path.startsWith('/')) {
return basePath + path.slice(1)
}
// Handle relative paths by resolving against document directory
const documentDir = getDirectory(documentPath)
let resolvedPath: string
if (path.startsWith('./')) {
// ./file.js relative to current directory
resolvedPath = documentDir + path.slice(2)
} else if (path.startsWith('../')) {
// ../file.js relative to parent directory
resolvedPath = documentDir + path
} else {
// file.js (no prefix) - treat as relative to current directory
resolvedPath = documentDir + path
}
// Normalize the path to resolve .. and .
resolvedPath = normalizePath(resolvedPath)
return basePath + resolvedPath
}
/**
* Rewrite srcset attribute (can contain multiple URLs)
* Format: "url1 1x, url2 2x" or "url1 100w, url2 200w"
*/
function rewriteSrcset(
srcset: string,
basePath: string,
documentPath: string
): string {
return srcset
.split(',')
.map((part) => {
const trimmed = part.trim()
const spaceIndex = trimmed.indexOf(' ')
if (spaceIndex === -1) {
// No descriptor, just URL
return rewritePath(trimmed, basePath, documentPath)
}
const url = trimmed.substring(0, spaceIndex)
const descriptor = trimmed.substring(spaceIndex)
return rewritePath(url, basePath, documentPath) + descriptor
})
.join(', ')
}
/**
* Rewrite absolute and relative paths in HTML content
* Uses simple regex matching for safety (no full HTML parsing)
*/
export function rewriteHtmlPaths(
html: string,
basePath: string,
documentPath: string
): string {
// Ensure base path ends with /
const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/'
let rewritten = html
// Rewrite each attribute type
// Use more specific patterns to prevent ReDoS attacks
for (const attr of REWRITABLE_ATTRIBUTES) {
if (attr === 'srcset') {
// Special handling for srcset - use possessive quantifiers via atomic grouping simulation
// Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS
const srcsetRegex = new RegExp(
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
'gi'
)
rewritten = rewritten.replace(srcsetRegex, (match, value) => {
const rewrittenValue = rewriteSrcset(
value,
normalizedBase,
documentPath
)
return `${attr}="${rewrittenValue}"`
})
} else {
// Regular attributes with quoted values
// Limit whitespace to prevent catastrophic backtracking
const doubleQuoteRegex = new RegExp(
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`,
'gi'
)
const singleQuoteRegex = new RegExp(
`\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`,
'gi'
)
rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => {
const rewrittenValue = rewritePath(
value,
normalizedBase,
documentPath
)
return `${attr}="${rewrittenValue}"`
})
rewritten = rewritten.replace(singleQuoteRegex, (match, value) => {
const rewrittenValue = rewritePath(
value,
normalizedBase,
documentPath
)
return `${attr}='${rewrittenValue}'`
})
}
}
return rewritten
}
/**
* Check if content is HTML based on content or filename
*/
export function isHtmlContent(filepath: string, contentType?: string): boolean {
if (contentType && contentType.includes('text/html')) {
return true
}
const ext = filepath.toLowerCase().split('.').pop()
return ext === 'html' || ext === 'htm'
}